framework,version,device,op_name,kernel_source,gemm_dtype,m,n,k,latency
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,3584,4.303232192993164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,4096,4.852223873138428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,5120,5.984543800354004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,2560,3.184767961502075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,7168,8.289407730102539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,3072,3.7479679584503174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,2048,2.6050240993499756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,512,0.8970239758491516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,256,0.5102720260620117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,8192,9.571136474609375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,1024,1.4685440063476562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,128,0.3980160057544708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,1536,2.044032096862793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,10240,12.480544090270996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,8192,2.39849591255188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,10240,3.077280044555664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,12288,15.09552001953125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,12288,3.8214080333709717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,7168,2.105151891708374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,5120,1.531391978263855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,4096,1.241312026977539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,3584,1.094688057899475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,3072,0.9541760087013245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,2560,0.8175359964370728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,16384,4.628799915313721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,1024,0.3782080113887787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,512,0.23916800320148468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,256,0.1297920048236847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,128,0.09737599641084671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,1536,0.5218240022659302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,2048,0.6666880249977112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,7168,1.6928319931030273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,8192,1.898848056793213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,16384,20.895488739013672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,10240,2.7272000312805176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,12288,3.566175937652588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,4096,0.9439039826393127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,5120,1.1511039733886719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,3584,0.82915198802948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,3072,0.7196159958839417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,512,0.1847040057182312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,2560,0.6145920157432556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,1024,0.2908799946308136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,2048,0.5077760219573975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,1536,0.3997119963169098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,128,0.07667200267314911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,256,0.10409600287675858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,16384,3.4717440605163574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,7168,1.348512053489685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,8192,1.4987200498580933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,10240,1.8512959480285645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,12288,2.214816093444824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,4096,0.7830399870872498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,5120,0.9678720235824585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,3584,0.6968960165977478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,3072,0.6051200032234192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,16384,2.9975359439849854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,2560,0.5167679786682129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,2048,0.42819198966026306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,1536,0.33452799916267395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,1024,0.2473600059747696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,128,0.0655680000782013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,256,0.08832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,512,0.15503999590873718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,8192,1.225823998451233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,10240,1.524672031402588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,12288,1.817855954170227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,5120,0.7867839932441711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,7168,1.0810879468917847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,16384,2.42524790763855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,4096,0.6419519782066345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,2048,0.3471679985523224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,2560,0.4199039936065674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,3072,0.4933759868144989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,3584,0.5703359842300415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,512,0.1263359934091568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,1024,0.19791999459266663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,256,0.07020799815654755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,1536,0.2731519937515259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,128,0.05488000065088272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,10240,1.3369280099868774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,12288,1.5942399501800537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,16384,2.2779839038848877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,65536,22.149023056030273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,7168,0.9557759761810303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,8192,1.0856000185012817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,5120,0.6932799816131592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,4096,0.5640320181846619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,3584,0.49935999512672424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,3072,0.4347519874572754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,2560,0.37145599722862244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,1024,0.17535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,1536,0.24063999950885773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,512,0.11219199746847153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,2048,0.30511999130249023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,256,0.06592000275850296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,128,0.0498879998922348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,65536,16.118976593017578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,8192,0.7756159901618958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,10240,0.9547520279884338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,12288,1.1396160125732422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,65536,13.477919578552246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,16384,1.8287999629974365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,5120,0.49641600251197815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,4096,0.40508800745010376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,3584,0.3593280017375946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,7168,0.6866239905357361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,1024,0.12774400413036346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,1536,0.17497600615024567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,2048,0.21952000260353088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,2560,0.26524800062179565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,3072,0.3134720027446747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,256,0.05040000006556511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,512,0.0828159973025322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,128,0.03836800158023834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,65536,10.86627197265625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,8192,0.6186879873275757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,10240,0.775871992111206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,12288,0.9145600199699402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,7168,0.5480960011482239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,5120,0.3999679982662201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,4096,0.32815998792648315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,16384,1.2009600400924683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,3584,0.2895680069923401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,3072,0.2502079904079437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,2560,0.2141440063714981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,1024,0.104032002389431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,512,0.06777600198984146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,2048,0.17788800597190857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,1536,0.1401599943637848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,256,0.040352001786231995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,128,0.03328000009059906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,65536,9.699263572692871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,8192,0.5424320101737976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,10240,0.670527994632721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,12288,0.7997120022773743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,5120,0.3479999899864197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,4096,0.2858240008354187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,16384,1.049056053161621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,7168,0.4780479967594147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,3584,0.25411200523376465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,1024,0.09257599711418152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,1536,0.12508800625801086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,3072,0.22092799842357635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,2048,0.1574079990386963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,2560,0.18915200233459473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,512,0.06092799827456474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,256,0.038176000118255615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,128,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,8192,0.46649599075317383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,10240,0.5773760080337524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,65536,6.864607810974121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,12288,0.6874880194664001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,16384,0.9050559997558594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,3584,0.21660800278186798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,5120,0.30239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,4096,0.2470719963312149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,7168,0.41206398606300354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,1024,0.08140800148248672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,1536,0.10831999778747559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,2048,0.1353600025177002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,3072,0.19123199582099915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,2560,0.1627199947834015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,512,0.05331199988722801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,256,0.0344959981739521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,128,0.027135999873280525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,65536,5.771520137786865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,8192,0.38998401165008545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,10240,0.48159998655319214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,12288,0.5743039846420288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,5120,0.25279998779296875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,4096,0.20665599405765533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,16384,0.7508800029754639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,3584,0.18432000279426575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,7168,0.3444159924983978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,1024,0.06908799707889557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,3072,0.15993599593639374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,1536,0.09164799749851227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,2048,0.11494400352239609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,2560,0.137472003698349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,128,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,256,0.03001599945127964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,512,0.04588799923658371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,65536,4.759136199951172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,8192,0.3136320114135742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,10240,0.3877440094947815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,12288,0.4598720073699951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,7168,0.2770879864692688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,5120,0.2035199999809265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,16384,0.6020799875259399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,4096,0.16595199704170227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,3584,0.1475200057029724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,3072,0.12905600666999817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,1536,0.07568000257015228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,1024,0.0568000003695488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,2560,0.11110399663448334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,2048,0.09417600184679031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,256,0.026208000257611275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,128,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,512,0.03836800158023834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,65536,4.080383777618408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,8192,0.23772799968719482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,10240,0.29254400730133057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,12288,0.34700798988342285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,7168,0.21091200411319733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,5120,0.15408000349998474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,16384,0.4554559886455536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,4096,0.12697599828243256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,3584,0.1138560026884079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,3072,0.09974399954080582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,1536,0.059007998555898666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,2560,0.08575999736785889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,2048,0.07327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,1024,0.04387199878692627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,512,0.030559999868273735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,256,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,128,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,65536,3.4396800994873047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,8192,0.16047999262809753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,10240,0.1985280066728592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,12288,0.2361920028924942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,7168,0.14313599467277527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,5120,0.10592000186443329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,16384,0.3073920011520386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,4096,0.08771199733018875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,3584,0.07862400263547897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,1024,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,1536,0.04095999896526337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,2048,0.05177599936723709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,2560,0.05993599817156792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,3072,0.06921599805355072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,512,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,128,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,256,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,65536,2.8593599796295166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,8192,0.08588799834251404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,10240,0.1045759990811348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,12288,0.1225920021533966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,7168,0.07628799974918365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,5120,0.05804799869656563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,16384,0.16019199788570404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,4096,0.049536000937223434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,3584,0.044256001710891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,2048,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,3072,0.0395519994199276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,2560,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,1536,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,1024,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,256,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,128,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,512,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,65536,2.084127902984619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,8192,0.054816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,10240,0.06521599739789963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,12288,0.07628799974918365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,7168,0.04931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,5120,0.03984000161290169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,16384,0.09811200201511383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,4096,0.03452799841761589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,3584,0.03097599931061268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,3072,0.027648000046610832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,2048,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,1536,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,2560,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,1024,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,512,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,128,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,256,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,65536,1.3804160356521606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,8192,0.04521600157022476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,10240,0.052032001316547394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,12288,0.060256000608205795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,7168,0.04163200035691261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,5120,0.03206399828195572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,16384,0.07625599950551987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,4096,0.02812799997627735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,3584,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,3072,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,512,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,1024,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,2560,0.01945599913597107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,2048,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,1536,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,256,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,128,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,65536,0.6814720034599304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,7168,4.136096000671387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,8192,4.6993279457092285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,65536,0.36393600702285767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,3584,2.159264087677002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,5120,3.001568078994751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,4096,2.441983938217163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,3072,1.88755202293396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,1536,1.034783959388733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,2048,1.3203519582748413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,65536,0.3025600016117096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,128,0.20473599433898926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,2560,1.6071679592132568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,512,0.4676159918308258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,256,0.2468159943819046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,1024,0.7411199808120728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,8192,1.2163200378417969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,12288,1.801759958267212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,10240,5.812128067016602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,16384,2.3861119747161865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,4096,0.6411200165748596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,3584,0.5663359761238098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,5120,0.786624014377594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,12288,6.965695858001709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,10240,1.5073599815368652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,7168,1.0808320045471191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,1024,0.19788800179958344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,1536,0.2712959945201874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,2560,0.42022401094436646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,2048,0.34534400701522827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,512,0.1263040006160736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,3072,0.4922559857368469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,256,0.06921599805355072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,128,0.05289600044488907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,16384,1.7924480438232422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,7168,0.8129600286483765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,8192,0.9250879883766174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,10240,1.1469440460205078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,12288,1.3636480569839478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,5120,0.5937920212745667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,16384,9.19923210144043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,4096,0.48419201374053955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,3584,0.43033599853515625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,3072,0.3736000061035156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,512,0.0960640013217926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,2560,0.31916800141334534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,1024,0.1526080071926117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,1536,0.2061759978532791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,2048,0.26335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,256,0.05737600103020668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,128,0.04294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,7168,0.6785280108451843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,8192,0.7724800109863281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,10240,0.9553920030593872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,12288,1.1397440433502197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,4096,0.4060800075531006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,5120,0.4955199956893921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,3584,0.3591359853744507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,3072,0.31494399905204773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,16384,1.502560019493103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,2048,0.21955199539661407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,2560,0.2677760124206543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,1536,0.17414399981498718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,512,0.08313599973917007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,1024,0.12831999361515045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,128,0.037408001720905304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,256,0.048895999789237976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,8192,0.6188160181045532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,10240,0.76528000831604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,12288,0.9138240218162537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,16384,1.200063943862915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,4096,0.32576000690460205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,5120,0.3996480107307434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,7168,0.5454080104827881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,2048,0.1767359972000122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,2560,0.2141440063714981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,3072,0.2499839961528778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,3584,0.2900800108909607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,256,0.0390079990029335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,512,0.06639999896287918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,1024,0.10505600273609161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,1536,0.14057600498199463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,128,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,10240,0.671455979347229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,12288,0.8002880215644836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,16384,1.0532480478286743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,65536,10.735648155212402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,7168,0.4789760112762451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,8192,0.544160008430481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,65536,8.003328323364258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,4096,0.28723201155662537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,5120,0.35120001435279846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,2560,0.1884479969739914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,3072,0.22060799598693848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,3584,0.252128005027771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,1536,0.1250240057706833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,2048,0.15644800662994385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,256,0.036959998309612274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,512,0.05721599981188774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,1024,0.09292799979448318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,128,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,65536,6.2600321769714355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,8192,0.39103999733924866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,10240,0.4832960069179535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,12288,0.5758079886436462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,7168,0.3457280099391937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,5120,0.2535040080547333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,4096,0.20800000429153442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,16384,0.7509440183639526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,3584,0.18400000035762787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,1024,0.06918399780988693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,2560,0.13740800321102142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,3072,0.16169600188732147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,1536,0.09238400310277939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,2048,0.11443199962377548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,65536,5.659840106964111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,256,0.02908799983561039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,512,0.04646399989724159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,128,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,7168,0.2788800001144409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,8192,0.31648001074790955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,10240,0.38835200667381287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,12288,0.46275201439857483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,3584,0.14815999567508698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,4096,0.16704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,5120,0.2046079933643341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,16384,0.6039999723434448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,3072,0.13158400356769562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,1536,0.07568000257015228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,2048,0.09471999853849411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,1024,0.057440001517534256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,2560,0.11193600296974182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,512,0.03622400015592575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,256,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,128,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,65536,4.671999931335449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,8192,0.28118398785591125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,12288,0.4047999978065491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,10240,0.34009599685668945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,7168,0.24848000705242157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,5120,0.18719999492168427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,16384,0.5284159779548645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,3584,0.1391039937734604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,4096,0.15593600273132324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,3072,0.12303999811410904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,1536,0.07513599842786789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,1024,0.05225599929690361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,2560,0.10847999900579453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,2048,0.09123200178146362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,512,0.03379200026392937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,256,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,128,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,65536,3.339200019836426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,10240,0.29203200340270996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,8192,0.2385919988155365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,12288,0.35097599029541016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,5120,0.15529599785804749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,7168,0.21084800362586975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,4096,0.12835200130939484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,16384,0.45494401454925537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,3584,0.1143679991364479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,3072,0.10060799866914749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,2560,0.08707199990749359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,2048,0.07360000163316727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,1024,0.044863998889923096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,1536,0.05907199904322624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,512,0.029920000582933426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,256,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,128,0.017503999173641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,65536,2.3729279041290283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,8192,0.2075520008802414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,10240,0.2535359859466553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,12288,0.296640008687973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,7168,0.18291200697422028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,16384,0.38284799456596375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,5120,0.1391039937734604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,3584,0.10604800283908844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,4096,0.11628799885511398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,3072,0.09401600062847137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,512,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,1024,0.03827200084924698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,1536,0.058848001062870026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,2560,0.08364800363779068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,2048,0.07129599899053574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,256,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,128,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,65536,2.0197761058807373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,8192,0.16035200655460358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,12288,0.2375359982252121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,10240,0.19891199469566345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,7168,0.14313599467277527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,16384,0.30774399638175964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,5120,0.10611200332641602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,3072,0.07039999961853027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,4096,0.08867199718952179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,3584,0.07894399762153625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,512,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,1024,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,2560,0.061184000223875046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,1536,0.04355200007557869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,2048,0.052191998809576035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,256,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,128,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,65536,1.7701120376586914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,8192,0.13065600395202637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,7168,0.11718399822711945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,12288,0.1841920018196106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,10240,0.15804800391197205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,16384,0.23763200640678406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,4096,0.07769600301980972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,3584,0.07097599655389786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,3072,0.06339199841022491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,5120,0.09151999652385712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,2560,0.056063998490571976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,512,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,1024,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,2048,0.04956800118088722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,1536,0.040352001786231995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,256,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,128,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,65536,1.4592000246047974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,7168,0.07606399804353714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,8192,0.08486399799585342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,12288,0.1212799996137619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,10240,0.10345599800348282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,4096,0.048608001321554184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,16384,0.1573439985513687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,3072,0.03939199820160866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,5120,0.05756799876689911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,3584,0.04428799822926521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,1024,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,512,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,1536,0.026208000257611275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,2048,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,2560,0.03488000109791756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,256,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,128,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,65536,1.1818560361862183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,7168,0.0459199994802475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,8192,0.04995200037956238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,12288,0.06972800195217133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,10240,0.060127999633550644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,16384,0.08950400352478027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,4096,0.029983999207615852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,5120,0.03612799942493439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,3072,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,3584,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,1024,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,512,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,2048,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,1536,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,2560,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,65536,0.8707200288772583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,256,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,128,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,7168,0.03481600061058998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,8192,0.0382080003619194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,12288,0.052352000027894974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,10240,0.04684799909591675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,3584,0.022272000089287758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,5120,0.027871999889612198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,3072,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,16384,0.06428799778223038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,4096,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,512,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,1536,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,1024,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,65536,0.6286399960517883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,2560,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,2048,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,256,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,128,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,8192,0.03574400022625923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,7168,0.032896000891923904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,10240,0.04243199899792671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,12288,0.04438399896025658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,4096,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,5120,0.02659199945628643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,3584,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,16384,0.054655998945236206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,3072,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,65536,0.3487679958343506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,2560,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,512,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,1024,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,2048,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,1536,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,65536,0.19644799828529358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,5120,1.5162880420684814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,7168,2.077631950378418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,65536,0.16924799978733063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,4096,1.238495945930481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,8192,2.3622400760650635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,2560,0.8046079874038696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,1536,0.5199679732322693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,1024,0.37913599610328674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,3584,1.0915199518203735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,512,0.23472000658512115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,256,0.13065600395202637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,2048,0.6649600267410278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,128,0.10313600301742554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,10240,2.9191360473632812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,3072,0.9524480104446411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,12288,3.483328104019165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,8192,0.6176319718360901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,7168,0.547104001045227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,10240,0.7566720247268677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,5120,0.3999679982662201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,12288,0.9106559753417969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,3584,0.28758400678634644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,3072,0.251008003950119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,2560,0.21305599808692932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,4096,0.32499200105667114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,2048,0.177279993891716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,512,0.061535999178886414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,1536,0.14150400459766388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,1024,0.10367999970912933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,256,0.03968000039458275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,16384,1.1966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,128,0.03161599859595299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,16384,4.647488117218018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,5120,0.3022719919681549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,7168,0.4111039936542511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,8192,0.4670720100402832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,10240,0.5731199979782104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,4096,0.2451840043067932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,12288,0.6878719925880432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,3584,0.21955199539661407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,3072,0.19033600389957428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,1536,0.10835199803113937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,2560,0.16233600676059723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,2048,0.13468800485134125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,512,0.04966399818658829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,1024,0.08054400235414505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,256,0.032896000891923904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,16384,0.9012479782104492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,128,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,8192,0.3873920142650604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,7168,0.34406399726867676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,10240,0.48089599609375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,12288,0.5720000267028809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,5120,0.2526719868183136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,4096,0.20742399990558624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,2560,0.13743999600410461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,3584,0.18387199938297272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,3072,0.16118399798870087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,16384,0.7510079741477966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,2048,0.11459200084209442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,1536,0.09200000017881393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,512,0.04320000112056732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,256,0.02864000014960766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,1024,0.07004799693822861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,128,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,10240,0.3845120072364807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,8192,0.31331199407577515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,12288,0.4607039988040924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,7168,0.2781440019607544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,16384,0.6018880009651184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,5120,0.20281599462032318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,4096,0.16543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,3072,0.1297599971294403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,3584,0.14812800288200378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,2560,0.11100800335407257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,2048,0.09302400052547455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,512,0.03561599925160408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,1536,0.07513599842786789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,1024,0.05552000179886818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,256,0.024032000452280045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,128,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,65536,4.68828821182251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,16384,0.525056004524231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,12288,0.40569600462913513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,10240,0.3421440124511719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,65536,3.5133121013641357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,8192,0.28035199642181396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,4096,0.15513600409030914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,3584,0.1385280042886734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,5120,0.1863359957933426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,7168,0.24908800423145294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,3072,0.12291199713945389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,1024,0.051711998879909515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,1536,0.07353600114583969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,2048,0.09043200314044952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,512,0.032255999743938446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,2560,0.10716799646615982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,256,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,128,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,65536,2.9305920600891113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,12288,0.29526400566101074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,8192,0.20479999482631683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,10240,0.2486719936132431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,7168,0.18329599499702454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,16384,0.3807680010795593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,3584,0.10470400005578995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,3072,0.09359999746084213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,4096,0.11648000031709671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,5120,0.13894400000572205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,2560,0.08271999657154083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,1536,0.054368000477552414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,1024,0.03846399858593941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,65536,2.3456320762634277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,512,0.025696000084280968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,2048,0.07043199986219406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,128,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,256,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,10240,0.19750399887561798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,8192,0.1608320027589798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,7168,0.14348800480365753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,12288,0.235167995095253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,16384,0.3076159954071045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,4096,0.08767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,5120,0.1064319983124733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,3072,0.06966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,3584,0.0790719985961914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,2560,0.0607680007815361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,2048,0.051392000168561935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,1536,0.04243199899792671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,512,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,128,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,1024,0.03222399950027466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,256,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,65536,2.3186240196228027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,12288,0.23235200345516205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,8192,0.16128000617027283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,10240,0.19779199361801147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,7168,0.14259199798107147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,5120,0.10623999685049057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,4096,0.08870399743318558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,16384,0.30768001079559326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,3584,0.07840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,2560,0.060127999633550644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,3072,0.06947200000286102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,1024,0.03276799991726875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,512,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,2048,0.05110400170087814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,1536,0.04195199906826019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,256,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,128,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,65536,1.4400639533996582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,7168,0.11801599711179733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,8192,0.13177600502967834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,12288,0.18531200289726257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,10240,0.1579200029373169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,3072,0.06371200084686279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,16384,0.23766399919986725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,4096,0.07769600301980972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,3584,0.07091200351715088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,5120,0.09151999652385712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,2560,0.05571199953556061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,65536,1.1780799627304077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,2048,0.04604800045490265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,512,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,1024,0.02659199945628643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,1536,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,256,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,128,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,10240,0.13471999764442444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,8192,0.1133119985461235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,7168,0.10124800354242325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,12288,0.15849600732326508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,3584,0.06015999987721443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,16384,0.20175999402999878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,4096,0.06566400080919266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,3072,0.05564799904823303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,65536,1.1773439645767212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,5120,0.0785600021481514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,512,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,1536,0.03433600068092346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,1024,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,256,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,2560,0.04944000020623207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,2048,0.042208001017570496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,65536,0.872767984867096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,128,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,5120,0.057760000228881836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,10240,0.10473600029945374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,8192,0.08483199775218964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,7168,0.0764160007238388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,12288,0.12163200229406357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,3072,0.03984000161290169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,3584,0.04435199871659279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,4096,0.04831999912858009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,2048,0.030208000913262367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,2560,0.03481600061058998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,16384,0.15833599865436554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,256,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,128,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,512,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,1536,0.02579200081527233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,1024,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,8192,0.08422400057315826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,7168,0.07612799853086472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,10240,0.10310400277376175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,12288,0.12064000219106674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,5120,0.05801599845290184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,3584,0.04368000105023384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,4096,0.048576001077890396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,16384,0.15484799444675446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,3072,0.03888000175356865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,2560,0.03455999866127968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,1536,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,1024,0.02070399932563305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,2048,0.02985600009560585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,512,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,65536,0.7354879975318909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,256,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,128,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,10240,0.06159999966621399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,12288,0.07603199779987335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,8192,0.061983998864889145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,7168,0.04614400118589401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,16384,0.10793600231409073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,3584,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,3072,0.025280000641942024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,4096,0.03014400042593479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,5120,0.03545600175857544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,65536,0.593824028968811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,2560,0.022592000663280487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,1024,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,2048,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,1536,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,256,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,128,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,65536,0.5576959848403931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,8192,0.03852799907326698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,10240,0.04604800045490265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,5120,0.027712000533938408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,7168,0.035360001027584076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,12288,0.052480001002550125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,2560,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,4096,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,16384,0.06252799928188324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,2048,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,3584,0.02271999977529049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,3072,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,1536,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,1024,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,256,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,512,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,7168,0.031968001276254654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,8192,0.03308799862861633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,12288,0.03907199949026108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,10240,0.036896001547575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,65536,0.31040000915527344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,16384,0.04540799930691719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,4096,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,3584,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,5120,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,3072,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,2560,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,2048,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,512,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,1024,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,1536,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,7168,0.02703999914228916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,8192,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,10240,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,12288,0.03331200033426285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,65536,0.17423999309539795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,5120,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,16384,0.03791999816894531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,3072,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,3584,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,2560,0.01651199907064438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,4096,0.021663999184966087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,2048,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,1536,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,1024,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,65536,0.11635199934244156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,5120,0.7830719947814941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,4096,0.6366080045700073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,16384,2.378943920135498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,3584,0.565280020236969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,65536,0.09600000083446503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,3072,0.49769601225852966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,7168,1.0759999752044678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,2048,0.34652799367904663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,1024,0.188960000872612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,8192,1.2161600589752197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,256,0.07248000055551529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,2560,0.4193919897079468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,512,0.10851199924945831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,1536,0.2702080011367798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,128,0.054368000477552414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,10240,1.5179200172424316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,10240,0.3835200071334839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,12288,1.8064639568328857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,7168,0.2791999876499176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,8192,0.3132160007953644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,5120,0.20403200387954712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,12288,0.46457600593566895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,4096,0.16662399470806122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,3072,0.12960000336170197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,3584,0.14870400726795197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,2560,0.11110399663448334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,512,0.034015998244285583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,1536,0.0748480036854744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,1024,0.05350400134921074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,2048,0.09350399672985077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,16384,0.6045119762420654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,128,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,256,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,12288,0.3457919955253601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,5120,0.15491199493408203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,10240,0.29235199093818665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,7168,0.2101760059595108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,8192,0.23574399948120117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,3072,0.100832000374794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,3584,0.11363200098276138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,4096,0.12636800110340118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,2048,0.07199999690055847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,2560,0.08569599688053131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,1536,0.059039998799562454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,512,0.02812799997627735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,1024,0.04294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,16384,0.45552000403404236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,256,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,128,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,65536,2.348191976547241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,12288,0.2931840121746063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,5120,0.13795199990272522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,7168,0.1847359985113144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,8192,0.2046400010585785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,10240,0.2500480115413666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,4096,0.11228799819946289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,3584,0.09948799759149551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,2560,0.07398399710655212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,3072,0.08528000116348267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,2048,0.06185600161552429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,16384,0.3814080059528351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,1536,0.050175998359918594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,128,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,1024,0.037248000502586365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,512,0.025087999179959297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,256,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,7168,0.14246399700641632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,12288,0.2338559925556183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,16384,0.30636799335479736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,8192,0.16121600568294525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,5120,0.10611200332641602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,10240,0.19756799936294556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,4096,0.08742400258779526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,2560,0.060447998344898224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,3072,0.06911999732255936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,1536,0.04169600084424019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,3584,0.07859200239181519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,2048,0.051263999193906784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,1024,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,256,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,512,0.021023999899625778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,128,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,10240,0.19756799936294556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,8192,0.16121600568294525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,16384,0.3060159981250763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,12288,0.23347200453281403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,5120,0.10627199709415436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,65536,1.1798399686813354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,65536,1.7654080390930176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,3584,0.07875200361013412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,4096,0.08694399893283844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,7168,0.14233599603176117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,1024,0.031039999797940254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,512,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,3072,0.06966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,1536,0.041760001331567764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,2048,0.050912000238895416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,2560,0.06054399907588959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,256,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,128,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,65536,1.4891200065612793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,7168,0.100832000374794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,8192,0.11222399771213531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,12288,0.15769599378108978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,10240,0.13395200669765472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,65536,1.1791679859161377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,3584,0.06003199890255928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,16384,0.20108799636363983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,4096,0.06537599861621857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,5120,0.07884799689054489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,3072,0.05567999929189682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,2048,0.040800001472234726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,1536,0.03388800099492073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,2560,0.047839999198913574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,1024,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,512,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,128,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,256,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,8192,0.0854400023818016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,7168,0.0764480009675026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,10240,0.10367999970912933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,5120,0.05833600088953972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,12288,0.12150400131940842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,3584,0.044096000492572784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,3072,0.03968000039458275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,4096,0.04806400090456009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,2560,0.03452799841761589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,16384,0.15811200439929962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,2048,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,1024,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,256,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,512,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,1536,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,128,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,7168,0.07542400062084198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,8192,0.08524800091981888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,12288,0.12240000069141388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,10240,0.10367999970912933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,16384,0.15775999426841736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,4096,0.048448000103235245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,5120,0.057472001761198044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,3072,0.03939199820160866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,2560,0.03452799841761589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,3584,0.044895999133586884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,65536,0.7293440103530884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,1536,0.025119999423623085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,512,0.014879999682307243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,2048,0.03001599945127964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,1024,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,256,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,128,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,12288,0.12044800072908401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,8192,0.08531200140714645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,10240,0.10316800326108932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,7168,0.07609599828720093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,65536,0.5947200059890747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,16384,0.15379199385643005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,5120,0.05772799998521805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,4096,0.04825599864125252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,65536,0.5936319828033447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,3584,0.04335999861359596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,3072,0.03920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,2560,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,2048,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,1536,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,256,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,512,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,1024,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,128,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,5120,0.05321599915623665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,12288,0.10367999970912933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,8192,0.07407999783754349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,7168,0.06800000369548798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,10240,0.08870399743318558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,3584,0.041760001331567764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,4096,0.04553600028157234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,16384,0.13036799430847168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,2048,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,3072,0.03951999917626381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,2560,0.03452799841761589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,512,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,1536,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,1024,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,256,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,128,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,12288,0.0682239979505539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,7168,0.044096000492572784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,10240,0.05897599831223488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,8192,0.04918399825692177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,65536,0.5590720176696777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,16384,0.08697599917650223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,5120,0.03446400165557861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,3584,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,4096,0.029632000252604485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,3072,0.024607999250292778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,2560,0.02239999920129776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,65536,0.464352011680603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,2048,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,1536,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,512,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,256,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,1024,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,128,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,7168,0.044064000248909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,5120,0.03446400165557861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,12288,0.0652799978852272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,8192,0.04809600114822388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,10240,0.05843200162053108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,16384,0.07820799946784973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,3584,0.02672000043094158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,4096,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,2560,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,2048,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,3072,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,1536,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,512,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,1024,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,256,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,128,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,65536,0.30851200222969055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,10240,0.046879999339580536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,12288,0.052960000932216644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,7168,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,8192,0.040608000010252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,16384,0.062144000083208084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,4096,0.024064000695943832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,2560,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,5120,0.02831999957561493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,3072,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,3584,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,2048,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,1536,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,1024,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,512,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,256,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,65536,0.23862400650978088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,7168,0.0315839983522892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,12288,0.039103999733924866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,8192,0.033440001308918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,10240,0.03619199991226196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,16384,0.0451200008392334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,5120,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,3584,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,3072,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,4096,0.02284800074994564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,2560,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,2048,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,1536,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,256,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,512,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,1024,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,128,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,65536,0.18272000551223755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,7168,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,5120,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,8192,0.027648000046610832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,10240,0.03094400092959404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,12288,0.03248000144958496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,16384,0.03683200106024742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,3584,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,3072,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,4096,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,2560,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,2048,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,1536,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,1024,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,65536,0.11363200098276138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,512,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,256,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,12288,0.02864000014960766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,7168,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,5120,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,8192,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,10240,0.026976000517606735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,16384,0.032255999743938446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,4096,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,3584,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,2048,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,3072,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,1536,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,1024,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,65536,0.07977599650621414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,8192,0.9160640239715576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,10240,1.1391359567642212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,65536,0.06540799885988235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,4096,0.48767998814582825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,5120,0.5909119844436646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,3072,0.3723840117454529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,3584,0.42950400710105896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,2560,0.3201279938220978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,7168,0.8089280128479004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,2048,0.2648639976978302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,1536,0.2062399983406067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,512,0.08982399851083755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,128,0.045184001326560974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,256,0.05990400165319443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,1024,0.3388479948043823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,12288,0.3455680012702942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,16384,0.4540160000324249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,5120,0.1539520025253296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,8192,0.23923200368881226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,12288,1.352735996246338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,7168,0.21119999885559082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,10240,0.2908799946308136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,4096,0.1273919939994812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,1536,0.131071999669075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,2048,0.07289600372314453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,2560,0.08671999722719193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,3584,0.11343999952077866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,3072,0.0995199978351593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,256,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,1024,0.043487999588251114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,128,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,512,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,7168,0.16636799275875092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,16384,1.7897919416427612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,8192,0.18716800212860107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,10240,0.2261440008878708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,3584,0.09446399658918381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,5120,0.12531200051307678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,3072,0.08556800335645676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,12288,0.26790401339530945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,4096,0.10566399991512299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,2048,0.06454399973154068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,1536,0.05427199974656105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,16384,0.34508800506591797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,1024,0.037408001720905304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,2560,0.07500799745321274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,512,0.025919999927282333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,256,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,128,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,12288,0.23395200073719025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,8192,0.16070400178432465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,7168,0.24588799476623535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,5120,0.17791999876499176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,10240,0.19855999946594238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,4096,0.2080959975719452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,3072,0.17558400332927704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,65536,1.2957119941711426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,16384,0.3072640001773834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,2560,0.14473600685596466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,3584,0.1977279931306839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,2048,0.051711998879909515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,1024,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,1536,0.04095999896526337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,512,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,256,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,128,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,7168,0.1178240031003952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,8192,0.1318719983100891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,12288,0.18403199315071106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,10240,0.1573760062456131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,4096,0.0777600035071373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,5120,0.09107200056314468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,3072,0.06387200206518173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,16384,0.2372799962759018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,3584,0.07129599899053574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,2560,0.05644800141453743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,1536,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,1024,0.026079999282956123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,2048,0.09356799721717834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,512,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,128,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,256,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,65536,1.7644799947738647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,7168,0.10630399733781815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,8192,0.11903999745845795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,10240,0.1408960074186325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,16384,0.2096959948539734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,12288,0.16473600268363953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,3584,0.06364800035953522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,5120,0.08182399719953537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,4096,0.07091200351715088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,2560,0.052480001002550125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,3072,0.05894400179386139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,1536,0.06774400174617767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,2048,0.07827199995517731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,1024,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,512,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,256,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,128,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,65536,1.1776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,12288,0.12095999717712402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,8192,0.08489599823951721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,10240,0.17734399437904358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,7168,0.12867200374603271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,5120,0.05852799862623215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,4096,0.04848000034689903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,3584,0.04403200000524521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,16384,0.27456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,65536,0.8708480000495911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,3072,0.08844800293445587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,1024,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,2048,0.06195199862122536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,2560,0.07686399668455124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,1536,0.05084799975156784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,512,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,256,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,128,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,7168,0.07603199779987335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,12288,0.12156800180673599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,10240,0.10284800082445145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,8192,0.0846719965338707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,16384,0.15785600244998932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,5120,0.05737600103020668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,4096,0.048608001321554184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,3072,0.04028800129890442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,3584,0.04342399910092354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,65536,0.7670720219612122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,2560,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,2048,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,512,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,1536,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,1024,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,256,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,128,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,12288,0.12099199742078781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,8192,0.08470399677753448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,7168,0.07622399926185608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,65536,0.5936639904975891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,10240,0.10566399991512299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,4096,0.04854400083422661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,16384,0.15807999670505524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,3072,0.03996799886226654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,3584,0.044256001710891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,5120,0.05798399820923805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,2560,0.03494400158524513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,2048,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,1024,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,512,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,256,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,1536,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,128,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,65536,0.5944640040397644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,7168,0.07539200037717819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,12288,0.09465599805116653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,8192,0.084927998483181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,16384,0.12015999853610992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,10240,0.08185599744319916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,2560,0.03484800085425377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,5120,0.057760000228881836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,2048,0.030271999537944794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,3584,0.04355200007557869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,4096,0.04854400083422661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,3072,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,1536,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,1024,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,256,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,512,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,128,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,12288,0.08124800026416779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,7168,0.05145600065588951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,10240,0.06867200136184692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,65536,0.5939199924468994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,8192,0.05782400071620941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,16384,0.10406400263309479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,4096,0.034015998244285583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,5120,0.04022400081157684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,3072,0.02812799997627735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,2560,0.035360001027584076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,2048,0.03001599945127964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,3584,0.03136000037193298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,1024,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,128,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,512,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,256,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,1536,0.024607999250292778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,65536,0.42188799381256104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,12288,0.12051200121641159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,10240,0.058880001306533813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,7168,0.04399999976158142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,8192,0.04864000156521797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,16384,0.16988800466060638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,5120,0.034015998244285583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,4096,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,3072,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,3584,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,2560,0.022272000089287758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,2048,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,1536,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,1024,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,512,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,256,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,128,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,65536,0.38182398676872253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,12288,0.05951999872922897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,10240,0.05455999821424484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,7168,0.04339199885725975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,8192,0.04848000034689903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,5120,0.03392000123858452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,16384,0.06828799843788147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,65536,0.23811200261116028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,4096,0.029023999348282814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,3584,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,2560,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,3072,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,2048,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,1536,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,512,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,1024,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,256,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,128,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,12288,0.044256001710891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,8192,0.03811199963092804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,5120,0.027712000533938408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,7168,0.03417599946260452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,10240,0.04025600105524063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,16384,0.05411199852824211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,4096,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,2048,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,3584,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,3072,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,2560,0.01833599992096424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,1536,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,65536,0.1884160041809082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,512,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,1024,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,256,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,12288,0.03872000053524971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,5120,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,16384,0.04435199871659279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,10240,0.03606399893760681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,7168,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,8192,0.034272000193595886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,4096,0.022592000663280487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,3072,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,2560,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,3584,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,1536,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,2048,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,65536,0.14243200421333313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,256,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,1024,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,16384,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,12288,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,5120,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,7168,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,8192,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,10240,0.030079999938607216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,3072,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,2048,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,4096,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,3584,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,65536,0.0936959981918335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,1024,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,12288,0.027871999889612198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,5120,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,7168,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,8192,0.024927999824285507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,10240,0.02672000043094158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,16384,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,4096,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,65536,0.0697920024394989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,3584,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,3072,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,2048,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,1536,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,4096,0.32175999879837036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,5120,0.3961600065231323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,7168,0.539903998374939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,12288,0.9119679927825928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,10240,0.758080005645752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,65536,0.058240000158548355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,3584,0.2852480113506317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,1024,0.09747199714183807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,512,0.059007998555898666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,1536,0.13523200154304504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,2048,0.1757120043039322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,16384,1.1999679803848267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,3072,0.2517760097980499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,128,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,2560,0.21423999965190887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,256,0.039744000881910324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,8192,0.6128640174865723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,7168,0.14284799993038177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,10240,0.19699199497699738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,5120,0.10691200196743011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,16384,0.30880001187324524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,4096,0.0875839963555336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,8192,0.16035200655460358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,3584,0.07843200117349625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,12288,0.23494400084018707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,3072,0.06934399902820587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,2560,0.06035200133919716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,2048,0.05004800111055374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,1536,0.04012800008058548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,1024,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,512,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,128,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,256,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,5120,0.08419200032949448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,12288,0.1831039935350418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,7168,0.1136000007390976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,8192,0.1292479932308197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,10240,0.1589760035276413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,3072,0.05471999943256378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,4096,0.06934399902820587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,3584,0.06220800057053566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,2560,0.04841599985957146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,1536,0.03302399814128876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,2048,0.0398080013692379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,16384,0.23862400650978088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,512,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,1024,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,128,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,256,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,10240,0.13433599472045898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,8192,0.11212799698114395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,7168,0.10182400047779083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,16384,0.20134399831295013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,12288,0.15689599514007568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,3584,0.060896001756191254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,4096,0.06812799721956253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,5120,0.07916799932718277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,3072,0.05567999929189682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,2560,0.048448000103235245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,2048,0.04156799986958504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,65536,0.8745599985122681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,1536,0.03296000137925148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,1024,0.02627200074493885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,512,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,256,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,128,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,12288,0.12198399752378464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,8192,0.08448000252246857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,10240,0.10400000214576721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,7168,0.07651200145483017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,5120,0.05804799869656563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,16384,0.15862399339675903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,4096,0.04816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,3584,0.04416000097990036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,3072,0.03948799893260002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,65536,1.1778559684753418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,2560,0.03548799827694893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,1536,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,2048,0.029791999608278275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,512,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,1024,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,256,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,128,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,12288,0.1207680031657219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,10240,0.1029760017991066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,16384,0.1576640009880066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,8192,0.084927998483181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,7168,0.07599999755620956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,4096,0.04835199937224388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,5120,0.057920001447200775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,65536,0.7295359969139099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,3072,0.03964800015091896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,3584,0.04368000105023384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,2560,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,1536,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,2048,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,256,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,1024,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,512,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,128,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,7168,0.06838399916887283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,12288,0.10476800054311752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,16384,0.13142399489879608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,65536,0.5942080020904541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,8192,0.07599999755620956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,10240,0.08895999938249588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,5120,0.05283199995756149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,4096,0.04620800167322159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,3584,0.04156799986958504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,3072,0.03936000168323517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,2560,0.03488000109791756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,2048,0.02908799983561039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,256,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,512,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,1024,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,128,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,1536,0.024800000712275505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,7168,0.04508800059556961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,12288,0.06886400282382965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,10240,0.059487998485565186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,8192,0.04979199916124344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,65536,0.5954560041427612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,16384,0.0878399983048439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,5120,0.034912001341581345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,3584,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,4096,0.029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,3072,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,2560,0.02252800017595291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,2048,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,512,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,1024,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,1536,0.01692800037562847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,256,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,128,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,65536,0.4663679897785187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,12288,0.06838399916887283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,10240,0.059167999774217606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,8192,0.04931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,7168,0.04416000097990036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,16384,0.08713600039482117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,65536,0.30847999453544617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,5120,0.0344959981739521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,4096,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,3584,0.02703999914228916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,3072,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,2560,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,1536,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,2048,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,1024,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,512,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,256,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,128,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,12288,0.06470400094985962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,8192,0.04851200059056282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,7168,0.043807998299598694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,5120,0.034912001341581345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,16384,0.07782399654388428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,10240,0.05817599967122078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,4096,0.029184000566601753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,3072,0.024671999737620354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,1536,0.016287999227643013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,3584,0.026847999542951584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,2048,0.019039999693632126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,2560,0.022304000332951546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,1024,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,256,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,128,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,512,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,65536,0.2763200104236603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,12288,0.060896001756191254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,7168,0.04358400031924248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,8192,0.04835199937224388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,10240,0.0549440011382103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,5120,0.03446400165557861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,16384,0.07305599749088287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,3584,0.02659199945628643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,4096,0.02879999950528145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,2048,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,3072,0.02409599907696247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,1536,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,2560,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,1024,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,512,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,256,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,128,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,65536,0.2393919974565506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,7168,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,8192,0.03859199956059456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,12288,0.05379199981689453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,16384,0.06281600147485733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,10240,0.044704001396894455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,65536,0.20975999534130096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,4096,0.023871999233961105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,3584,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,5120,0.027712000533938408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,2560,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,3072,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,2048,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,1536,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,1024,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,512,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,12288,0.04543999955058098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,5120,0.02755199931561947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,8192,0.037087999284267426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,10240,0.040863998234272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,7168,0.03440000116825104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,16384,0.05273599922657013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,4096,0.02377600036561489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,3072,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,65536,0.18367999792099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,3584,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,2560,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,2048,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,1536,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,1024,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,512,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,256,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,12288,0.0387520007789135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,8192,0.03302399814128876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,10240,0.03564799949526787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,5120,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,7168,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,16384,0.045024000108242035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,4096,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,3584,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,3072,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,2560,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,2048,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,1536,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,1024,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,512,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,65536,0.14217600226402283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,256,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,128,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,12288,0.031968001276254654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,5120,0.026944000273942947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,10240,0.03001599945127964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,8192,0.028255999088287354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,7168,0.02672000043094158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,16384,0.036928001791238785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,4096,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,3072,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,2048,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,3584,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,2560,0.01692800037562847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,65536,0.1143999993801117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,1536,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,12288,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,5120,0.02163200080394745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,8192,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,10240,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,7168,0.022943999618291855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,16384,0.03254399821162224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,4096,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,2560,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,3584,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,65536,0.08144000172615051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,3072,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,2048,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,1024,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,12288,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,8192,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,10240,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,7168,0.02252800017595291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,5120,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,16384,0.028863999992609024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,4096,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,65536,0.06304000318050385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,3584,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,3072,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,2560,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,1024,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,65536,0.053599998354911804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,4096,0.32236799597740173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,5120,0.3954240083694458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,16384,1.4526400566101074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,3584,0.2877120077610016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,7168,0.6492800116539001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,2048,0.38390401005744934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,8192,0.6165760159492493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,3072,0.24937599897384644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,2560,0.21465599536895752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,1024,0.1881600022315979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,256,0.04790399968624115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,128,0.03136000037193298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,512,0.0944959968328476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,1536,0.29523199796676636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,10240,0.948095977306366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,12288,1.1264640092849731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,12288,0.23532800376415253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,5120,0.10684800148010254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,8192,0.16025599837303162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,7168,0.14294399321079254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,10240,0.1977279931306839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,16384,0.3102720081806183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,4096,0.08799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,3584,0.07913599908351898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,3072,0.06857600063085556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,2560,0.05958399921655655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,1536,0.07142399996519089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,2048,0.09734400361776352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,256,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,1024,0.04934399947524071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,512,0.026976000517606735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,128,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,12288,0.2223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,5120,0.1669120043516159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,8192,0.24713599681854248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,10240,0.19731199741363525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,16384,0.2913280129432678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,7168,0.22668799757957458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,2560,0.0822720006108284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,4096,0.1334719955921173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,3072,0.09958399832248688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,2048,0.07334399968385696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,3584,0.12272000312805176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,1536,0.05398400127887726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,1024,0.03788800165057182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,512,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,256,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,128,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,65536,1.2997440099716187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,12288,0.1931840032339096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,10240,0.15564799308776855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,8192,0.12937599420547485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,7168,0.11423999816179276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,4096,0.07654400169849396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,5120,0.08886399865150452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,3584,0.06851200014352798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,16384,0.24799999594688416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,3072,0.059167999774217606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,2560,0.07401599735021591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,2048,0.06726399809122086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,1536,0.049855999648571014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,1024,0.032896000891923904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,256,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,128,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,512,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,7168,0.07651200145483017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,16384,0.15910400450229645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,8192,0.08470399677753448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,12288,0.1218239963054657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,10240,0.10339199751615524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,3584,0.0432640016078949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,5120,0.057792000472545624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,4096,0.04879999905824661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,2560,0.03417599946260452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,3072,0.039264000952243805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,2048,0.02969600073993206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,1536,0.03747199848294258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,1024,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,512,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,256,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,128,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,65536,1.1403199434280396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,12288,0.12118399888277054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,8192,0.0846719965338707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,7168,0.07571200281381607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,65536,0.5952000021934509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,10240,0.10342399775981903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,5120,0.05859199911355972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,16384,0.1592639982700348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,4096,0.04809600114822388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,3584,0.04307200014591217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,3072,0.03868800029158592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,65536,0.8590080142021179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,2560,0.03420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,2048,0.02956799976527691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,1536,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,512,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,256,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,1024,0.020128000527620316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,128,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,12288,0.08207999914884567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,8192,0.058400001376867294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,7168,0.05161599814891815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,5120,0.03999999910593033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,10240,0.09808000177145004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,16384,0.10396800190210342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,4096,0.04694399982690811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,2048,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,3584,0.031007999554276466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,3072,0.03852799907326698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,2560,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,1536,0.024607999250292778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,256,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,128,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,512,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,1024,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,12288,0.1234240010380745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,10240,0.11430399864912033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,8192,0.08476799726486206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,7168,0.07760000228881836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,16384,0.16713599860668182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,5120,0.05939200147986412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,65536,0.5988479852676392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,3584,0.04467200115323067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,65536,0.38684800267219543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,4096,0.047968000173568726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,3072,0.036896001547575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,2560,0.03299200162291527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,2048,0.02751999907195568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,256,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,1024,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,1536,0.021344000473618507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,512,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,128,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,12288,0.11744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,7168,0.0772479996085167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,5120,0.05721599981188774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,8192,0.08720000088214874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,16384,0.0910400003194809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,10240,0.09689600020647049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,65536,0.3910079896450043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,3072,0.03728000074625015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,2048,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,4096,0.04758400097489357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,3584,0.04243199899792671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,2560,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,1536,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,1024,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,256,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,512,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,128,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,12288,0.07340800017118454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,10240,0.06384000182151794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,8192,0.0772159993648529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,7168,0.07199999690055847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,16384,0.08892799913883209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,5120,0.049375999718904495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,4096,0.04262400045990944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,3584,0.03920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,3072,0.03190400078892708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,2048,0.024032000452280045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,1536,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,65536,0.34595200419425964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,2560,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,1024,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,256,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,512,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,128,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,8192,0.06924799829721451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,7168,0.06415999680757523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,5120,0.047680001705884933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,10240,0.06486400216817856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,16384,0.07977599650621414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,12288,0.0655680000782013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,65536,0.3006080090999603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,4096,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,3584,0.03308799862861633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,3072,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,1536,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,2560,0.026016000658273697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,2048,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,1024,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,512,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,256,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,128,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,12288,0.044415999203920364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,8192,0.037728000432252884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,16384,0.05353600159287453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,10240,0.04495999962091446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,5120,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,65536,0.23731200397014618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,7168,0.0342399999499321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,4096,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,3584,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,3072,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,2560,0.01744000054895878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,2048,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,512,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,1024,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,1536,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,256,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,12288,0.04070400074124336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,7168,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,8192,0.033695999532938004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,5120,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,10240,0.03747199848294258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,16384,0.04713600128889084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,65536,0.21775999665260315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,3072,0.019039999693632126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,3584,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,4096,0.023455999791622162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,2560,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,2048,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,1536,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,512,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,256,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,1024,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,128,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,8192,0.0342399999499321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,5120,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,7168,0.02796800062060356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,12288,0.037696000188589096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,10240,0.03542400151491165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,16384,0.043168000876903534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,4096,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,3072,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,3584,0.02038400061428547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,2560,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,2048,0.014879999682307243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,1536,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,1024,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,65536,0.16793599724769592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,512,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,256,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,12288,0.031136000528931618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,8192,0.02703999914228916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,7168,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,5120,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,65536,0.09212800115346909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,10240,0.029632000252604485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,16384,0.03619199991226196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,4096,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,2048,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,1536,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,12288,0.027712000533938408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,10240,0.026079999282956123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,5120,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,8192,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,16384,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,65536,0.06915199756622314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,7168,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,4096,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,1536,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,2560,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,128,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,12288,0.025631999596953392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,8192,0.022624000906944275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,5120,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,10240,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,7168,0.022655999287962914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,16384,0.02985600009560585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,65536,0.055424001067876816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,3584,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,4096,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,2560,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,1536,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,512,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,65536,0.047775998711586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,4096,0.16566400229930878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,5120,0.20396800339221954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,8192,0.31142398715019226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,12288,0.45686399936676025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,3072,0.12972800433635712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,10240,0.38678398728370667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,3584,0.14800000190734863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,2048,0.09334400296211243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,2560,0.11155200004577637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,256,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,7168,0.2770560085773468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,512,0.03481600061058998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,1536,0.07353600114583969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,128,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,1024,0.05407999828457832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,5120,0.058400001376867294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,12288,0.12303999811410904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,8192,0.08627200126647949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,7168,0.07686399668455124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,10240,0.10342399775981903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,3584,0.04390399903059006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,3072,0.039264000952243805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,4096,0.04771199822425842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,2560,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,16384,0.15824000537395477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,1536,0.025407999753952026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,2048,0.02969600073993206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,256,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,1024,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,512,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,128,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,16384,0.6024960279464722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,8192,0.08560000360012054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,7168,0.07580800354480743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,12288,0.12086399644613266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,16384,0.1544319987297058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,10240,0.10393600165843964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,5120,0.05798399820923805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,3584,0.04390399903059006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,4096,0.04758400097489357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,2560,0.033984001725912094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,3072,0.03907199949026108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,2048,0.0297279991209507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,1536,0.025312000885605812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,1024,0.020320000126957893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,65536,0.5968959927558899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,512,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,128,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,256,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,8192,0.07529599964618683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,5120,0.05385600030422211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,12288,0.1034879982471466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,7168,0.06838399916887283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,10240,0.08924800157546997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,16384,0.13104000687599182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,3584,0.04137599840760231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,3072,0.03814399987459183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,4096,0.04483199864625931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,2048,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,2560,0.033504001796245575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,1536,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,65536,0.5583360195159912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,128,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,512,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,256,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,1024,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,12288,0.0703359991312027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,10240,0.058880001306533813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,5120,0.03526400029659271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,7168,0.046271998435258865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,8192,0.04982399940490723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,4096,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,16384,0.08959999680519104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,3584,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,3072,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,2048,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,1536,0.017343999817967415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,2560,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,512,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,65536,0.46806401014328003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,1024,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,128,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,256,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,5120,0.03440000116825104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,8192,0.0498879998922348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,7168,0.04492799937725067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,12288,0.06880000233650208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,10240,0.05878400057554245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,16384,0.08726400136947632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,2048,0.019200000911951065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,2560,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,3072,0.023871999233961105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,3584,0.026559999212622643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,4096,0.02876799926161766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,1536,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,512,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,256,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,128,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,1024,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,12288,0.062111999839544296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,65536,0.3081600069999695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,10240,0.05443200096487999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,7168,0.04368000105023384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,8192,0.04822399839758873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,16384,0.07308799773454666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,4096,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,5120,0.03465599939227104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,65536,0.2749119997024536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,3584,0.026016000658273697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,3072,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,2560,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,2048,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,512,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,1024,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,1536,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,256,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,128,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,12288,0.052960000932216644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,8192,0.03827200084924698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,10240,0.04601600021123886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,7168,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,16384,0.06207999959588051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,5120,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,4096,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,65536,0.20627200603485107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,3584,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,3072,0.01945599913597107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,2560,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,1024,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,2048,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,1536,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,512,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,256,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,128,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,5120,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,7168,0.03420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,12288,0.04873599857091904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,8192,0.038495998829603195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,10240,0.04339199885725975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,16384,0.05849599838256836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,4096,0.022911999374628067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,3584,0.021023999899625778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,3072,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,2560,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,1536,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,2048,0.015807999297976494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,1024,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,512,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,256,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,128,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,65536,0.17520000040531158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,12288,0.044576000422239304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,10240,0.04070400074124336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,8192,0.03782400116324425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,5120,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,16384,0.05318399891257286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,7168,0.03465599939227104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,4096,0.022784000262618065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,65536,0.1597760021686554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,3584,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,1024,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,3072,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,2560,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,1536,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,2048,0.0163199994713068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,512,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,256,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,128,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,12288,0.04217600077390671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,8192,0.03542400151491165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,10240,0.03791999816894531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,5120,0.026944000273942947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,7168,0.03215999901294708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,16384,0.04995200037956238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,65536,0.14441600441932678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,3584,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,4096,0.02300800010561943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,2560,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,2048,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,3072,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,1024,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,1536,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,512,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,256,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,12288,0.03852799907326698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,8192,0.03331200033426285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,7168,0.03152000159025192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,5120,0.026528000831604004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,10240,0.035711999982595444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,16384,0.044319998472929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,4096,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,2560,0.017503999173641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,3584,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,2048,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,3072,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,1536,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,1024,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,256,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,65536,0.12931199371814728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,12288,0.037856001406908035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,7168,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,8192,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,10240,0.03363199904561043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,5120,0.025280000641942024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,65536,0.11327999830245972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,4096,0.02239999920129776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,16384,0.04227200150489807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,2560,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,3072,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,2048,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,1536,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,1024,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,3584,0.020320000126957893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,512,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,128,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,12288,0.03161599859595299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,8192,0.027904000133275986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,10240,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,7168,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,5120,0.024064000695943832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,16384,0.03641600161790848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,4096,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,65536,0.09318400174379349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,2560,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,3072,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,3584,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,1024,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,1536,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,2048,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,128,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,12288,0.02796800062060356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,8192,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,7168,0.02319999970495701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,5120,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,10240,0.02643200010061264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,16384,0.03219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,4096,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,65536,0.07916799932718277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,3072,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,3584,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,2560,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,1536,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,2048,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,128,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,12288,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,7168,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,10240,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,16384,0.029503999277949333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,5120,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,8192,0.02284800074994564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,65536,0.06195199862122536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,4096,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,3584,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,3072,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,2560,0.015807999297976494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,2048,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,1536,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,1024,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,12288,0.025248000398278236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,8192,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,16384,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,7168,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,5120,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,10240,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,65536,0.05305600166320801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,4096,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,3584,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,2560,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,3072,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,1024,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,65536,0.048287998884916306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,4096,0.16790400445461273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,5120,0.20342400670051575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,7168,0.27740800380706787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,10240,0.4326399862766266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,3072,0.1303360015153885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,2560,0.11126399785280228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,3584,0.14873600006103516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,2048,0.09328000247478485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,512,0.04566400125622749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,1536,0.1332480013370514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,256,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,1024,0.08892799913883209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,8192,0.3498240113258362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,128,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,12288,0.12163200229406357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,12288,0.5041279792785645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,10240,0.10342399775981903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,8192,0.08448000252246857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,7168,0.07747200131416321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,5120,0.0586559996008873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,4096,0.04979199916124344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,3584,0.043487999588251114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,3072,0.03993599861860275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,2560,0.03465599939227104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,16384,0.15785600244998932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,2048,0.030592000111937523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,1024,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,1536,0.03481600061058998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,512,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,256,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,16384,0.6785280108451843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,128,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,12288,0.13497599959373474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,5120,0.05734400078654289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,7168,0.0753600001335144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,8192,0.08479999750852585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,10240,0.10534399747848511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,4096,0.048287998884916306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,16384,0.171424001455307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,3584,0.043168000876903534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,3072,0.03964800015091896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,2560,0.03465599939227104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,2048,0.02985600009560585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,65536,0.6809279918670654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,1024,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,1536,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,512,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,256,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,128,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,12288,0.11459200084209442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,5120,0.054336000233888626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,7168,0.06867200136184692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,10240,0.0960640013217926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,16384,0.13555200397968292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,8192,0.08070400357246399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,4096,0.04742399975657463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,2048,0.02956799976527691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,65536,0.5771840214729309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,3584,0.04508800059556961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,2560,0.034591998904943466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,3072,0.04054399952292442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,1536,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,1024,0.021663999184966087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,512,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,256,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,128,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,12288,0.11686400324106216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,10240,0.09804800152778625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,8192,0.0785600021481514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,5120,0.05273599922657013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,16384,0.15452800691127777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,7168,0.07119999825954437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,4096,0.04447999969124794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,3072,0.03657599911093712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,3584,0.03968000039458275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,2560,0.03094400092959404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,1536,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,2048,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,65536,0.5608639717102051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,512,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,1024,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,256,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,128,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,12288,0.10857599973678589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,10240,0.09094399958848953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,5120,0.049375999718904495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,7168,0.06883200258016586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,8192,0.07731200009584427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,4096,0.04131200164556503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,16384,0.10073599964380264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,2560,0.028575999662280083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,3072,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,3584,0.03670400008559227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,2048,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,512,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,1536,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,128,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,256,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,1024,0.0163199994713068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,8192,0.06672000139951706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,12288,0.06729599833488464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,10240,0.05392000079154968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,65536,0.3538239896297455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,7168,0.05724800005555153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,16384,0.07222399860620499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,65536,0.27497598528862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,3584,0.030688000842928886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,4096,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,2560,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,5120,0.042208001017570496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,3072,0.02755199931561947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,1536,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,2048,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,1024,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,128,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,256,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,12288,0.05427199974656105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,10240,0.04633599892258644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,7168,0.04492799937725067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,16384,0.06409599632024765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,5120,0.03542400151491165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,8192,0.04934399947524071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,65536,0.20585599541664124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,4096,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,3584,0.02579200081527233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,2560,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,1536,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,2048,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,3072,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,1024,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,512,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,256,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,128,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,12288,0.049695998430252075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,8192,0.04972799867391586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,10240,0.045471999794244766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,7168,0.04169600084424019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,5120,0.031968001276254654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,16384,0.061216000467538834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,65536,0.17500799894332886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,4096,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,2560,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,3584,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,2048,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,3072,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,1536,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,256,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,512,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,1024,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,12288,0.048767998814582825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,7168,0.03996799886226654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,5120,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,10240,0.042367998510599136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,8192,0.037408001720905304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,16384,0.06060799956321716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,4096,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,3584,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,65536,0.1574079990386963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,2560,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,2048,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,3072,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,1536,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,1024,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,512,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,128,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,256,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,12288,0.04281599819660187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,8192,0.03417599946260452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,5120,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,7168,0.033344000577926636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,10240,0.04095999896526337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,16384,0.04992000013589859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,65536,0.143327996134758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,4096,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,2560,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,3584,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,3072,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,2048,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,1024,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,1536,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,256,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,128,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,512,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,12288,0.03951999917626381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,8192,0.033055998384952545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,7168,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,10240,0.036959998309612274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,5120,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,16384,0.04745600000023842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,4096,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,65536,0.12851199507713318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,3584,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,1024,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,1536,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,3072,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,2560,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,2048,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,512,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,256,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,12288,0.03545600175857544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,8192,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,10240,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,7168,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,5120,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,16384,0.042047999799251556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,65536,0.12569600343704224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,4096,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,3072,0.019200000911951065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,1536,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,2560,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,3584,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,2048,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,1024,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,512,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,12288,0.03145600110292435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,16384,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,8192,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,4096,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,5120,0.02518399991095066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,10240,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,7168,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,65536,0.1090560033917427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,3584,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,3072,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,2560,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,1536,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,2048,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,1024,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,12288,0.027135999873280525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,8192,0.024064000695943832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,10240,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,7168,0.023072000592947006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,5120,0.022016000002622604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,16384,0.030559999868273735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,65536,0.07859200239181519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,4096,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,2560,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,3584,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,3072,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,1536,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,1024,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,256,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,12288,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,10240,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,7168,0.02163200080394745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,65536,0.05593600124120712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,8192,0.022592000663280487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,5120,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,16384,0.03001599945127964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,3584,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,3072,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,4096,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,2560,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,2048,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,1536,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,1024,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,512,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,12288,0.02611199952661991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,8192,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,7168,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,5120,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,16384,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,10240,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,4096,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,65536,0.047168001532554626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,3072,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,2560,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,1024,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,65536,0.04729599878191948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,4096,0.2234559953212738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,5120,0.2848320007324219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,7168,0.389055997133255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,3584,0.20112000405788422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,8192,0.4431999921798706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,3072,0.17046399414539337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,12288,0.6480640172958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,2560,0.14217600226402283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,2048,0.10188800096511841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,1024,0.10387200117111206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,512,0.05209600180387497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,256,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,1536,0.15081599354743958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,128,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,16384,0.6684799790382385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,12288,0.16313600540161133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,10240,0.5659520030021667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,8192,0.09414400160312653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,7168,0.08963199704885483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,5120,0.07171200215816498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,4096,0.04960000142455101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,10240,0.13247999548912048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,3584,0.04566400125622749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,2560,0.03574400022625923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,3072,0.04054399952292442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,1536,0.02691200003027916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,16384,0.2261119931936264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,512,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,2048,0.031072000041604042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,1024,0.029759999364614487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,128,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,256,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,5120,0.05913599953055382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,12288,0.13551999628543854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,8192,0.08550400286912918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,7168,0.08425600081682205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,10240,0.12172800302505493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,16384,0.18479999899864197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,4096,0.049056001007556915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,3584,0.04451199993491173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,3072,0.03996799886226654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,2560,0.03590400144457817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,2048,0.030688000842928886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,65536,0.8825280070304871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,1536,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,128,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,512,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,1024,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,256,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,5120,0.0639680027961731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,7168,0.07891199737787247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,10240,0.09775999933481216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,8192,0.08844800293445587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,16384,0.14300799369812012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,12288,0.12105599790811539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,3072,0.039712000638246536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,4096,0.054336000233888626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,3584,0.04729599878191948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,2560,0.03545600175857544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,2048,0.03017600066959858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,1536,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,1024,0.023744000121951103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,128,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,512,0.01744000054895878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,256,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,12288,0.13711999356746674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,7168,0.0854400023818016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,10240,0.11568000167608261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,16384,0.18780800700187683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,8192,0.0883840024471283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,5120,0.06067200005054474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,4096,0.05036799982190132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,3584,0.04979199916124344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,2560,0.03407999873161316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,3072,0.040063999593257904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,65536,0.6838399767875671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,65536,0.59334397315979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,2048,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,1536,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,128,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,1024,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,256,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,512,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,12288,0.12323199957609177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,5120,0.059167999774217606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,10240,0.11324799805879593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,7168,0.07932800054550171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,8192,0.08857599645853043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,65536,0.3931199908256531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,16384,0.10886400192975998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,4096,0.05071999877691269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,3072,0.03824000060558319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,3584,0.04623999819159508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,2560,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,2048,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,1536,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,1024,0.01772800087928772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,256,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,512,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,128,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,12288,0.07072000205516815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,7168,0.07017599791288376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,10240,0.05491200089454651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,16384,0.07257600128650665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,8192,0.0791039988398552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,4096,0.042367998510599136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,5120,0.054976001381874084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,3072,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,2048,0.025696000084280968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,1536,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,2560,0.031136000528931618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,3584,0.040608000010252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,512,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,1024,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,256,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,128,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,65536,0.2759360074996948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,12288,0.05417599901556969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,8192,0.05958399921655655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,7168,0.05158400163054466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,10240,0.05369599908590317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,65536,0.20643199980258942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,4096,0.03244800120592117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,3584,0.03014400042593479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,5120,0.040192000567913055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,3072,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,16384,0.0708480030298233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,2560,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,1536,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,2048,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,1024,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,256,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,512,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,128,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,12288,0.05244800075888634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,8192,0.057151999324560165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,10240,0.0488319993019104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,7168,0.052191998809576035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,16384,0.06159999966621399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,65536,0.17500799894332886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,5120,0.038784001022577286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,3584,0.028704000636935234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,4096,0.03299200162291527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,3072,0.025119999423623085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,2048,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,2560,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,1536,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,1024,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,512,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,128,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,256,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,7168,0.0498879998922348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,8192,0.040063999593257904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,5120,0.03667199984192848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,12288,0.04809600114822388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,16384,0.06019200012087822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,10240,0.043296001851558685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,3584,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,65536,0.15852800011634827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,4096,0.03152000159025192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,3072,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,2560,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,2048,0.019039999693632126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,1536,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,1024,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,256,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,512,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,128,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,12288,0.04447999969124794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,8192,0.03510399907827377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,10240,0.0424639992415905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,5120,0.03270399942994118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,7168,0.033344000577926636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,16384,0.05619199946522713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,65536,0.14470399916172028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,4096,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,3584,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,2560,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,1536,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,3072,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,2048,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,1024,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,512,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,256,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,128,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,12288,0.03888000175356865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,10240,0.036288000643253326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,7168,0.02828799933195114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,8192,0.03433600068092346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,16384,0.04575999826192856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,5120,0.025312000885605812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,65536,0.1281599998474121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,3584,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,2048,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,4096,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,1536,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,3072,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,2560,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,1024,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,512,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,256,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,128,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,12288,0.03331200033426285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,16384,0.042399998754262924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,7168,0.02755199931561947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,8192,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,10240,0.031136000528931618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,5120,0.026208000257611275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,4096,0.02271999977529049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,3072,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,3584,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,65536,0.1313920021057129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,2560,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,1536,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,2048,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,512,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,256,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,128,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,1024,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,12288,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,5120,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,8192,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,10240,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,7168,0.026559999212622643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,65536,0.11343999952077866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,16384,0.0350399985909462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,4096,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,3072,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,2560,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,2048,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,3584,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,1536,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,1024,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,256,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,12288,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,7168,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,5120,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,8192,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,16384,0.028863999992609024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,10240,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,65536,0.07744000107049942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,4096,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,3584,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,3072,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,1024,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,2560,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,1536,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,2048,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,512,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,256,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,128,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,12288,0.025119999423623085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,7168,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,10240,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,5120,0.021023999899625778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,16384,0.03222399950027466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,8192,0.022943999618291855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,4096,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,65536,0.055615998804569244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,3584,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,2560,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,3072,0.01772800087928772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,1536,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,2048,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,512,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,1024,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,256,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,12288,0.026559999212622643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,10240,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,5120,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,16384,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,8192,0.02703999914228916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,7168,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,4096,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,65536,0.04896000027656555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,3584,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,1536,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,2560,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,2048,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,512,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,1024,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,65536,0.052319999784231186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,4096,0.10838399827480316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,5120,0.13340799510478973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,7168,0.17830400168895721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,8192,0.20297600328922272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,3584,0.09612800180912018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,10240,0.25046399235725403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,3072,0.08511999994516373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,2560,0.07411199808120728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,1536,0.05084799975156784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,2048,0.06204799935221672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,128,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,256,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,512,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,1024,0.03923200070858002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,16384,0.38819199800491333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,12288,0.2951039969921112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,12288,0.08243200182914734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,5120,0.039903998374938965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,10240,0.07011199742555618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,7168,0.05100800096988678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,8192,0.05801599845290184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,16384,0.10463999956846237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,3584,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,2560,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,4096,0.033663999289274216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,3072,0.02816000021994114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,2048,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,1536,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,512,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,1024,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,256,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,128,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,10240,0.06966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,8192,0.057920001447200775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,5120,0.0395519994199276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,7168,0.05104000121355057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,16384,0.10406400263309479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,12288,0.081727996468544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,2560,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,4096,0.03420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,3584,0.03094400092959404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,3072,0.027744000777602196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,1536,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,2048,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,512,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,1024,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,128,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,256,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,12288,0.08118399977684021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,10240,0.06918399780988693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,16384,0.1037760004401207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,7168,0.05158400163054466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,8192,0.057312000542879105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,5120,0.039583999663591385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,4096,0.033504001796245575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,3072,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,3584,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,2560,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,2048,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,65536,0.3834240138530731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,1536,0.019039999693632126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,1024,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,512,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,256,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,128,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,12288,0.052319999784231186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,65536,0.3818559944629669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,7168,0.033824000507593155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,8192,0.039103999733924866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,10240,0.04678399860858917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,16384,0.06307200342416763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,5120,0.02707199938595295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,65536,0.37753599882125854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,4096,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,3584,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,3072,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,2048,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,2560,0.01833599992096424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,256,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,512,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,1536,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,1024,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,12288,0.04899200052022934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,5120,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,7168,0.03392000123858452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,8192,0.0379519984126091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,10240,0.04374400153756142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,16384,0.05801599845290184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,4096,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,65536,0.20044800639152527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,2560,0.01772800087928772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,3072,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,3584,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,2048,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,1024,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,1536,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,512,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,256,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,128,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,12288,0.042527999728918076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,16384,0.050175998359918594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,8192,0.03532800078392029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,7168,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,5120,0.026208000257611275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,10240,0.03868800029158592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,4096,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,3584,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,65536,0.1791359931230545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,3072,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,2560,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,2048,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,1536,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,1024,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,512,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,256,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,12288,0.03951999917626381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,5120,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,8192,0.03363199904561043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,7168,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,16384,0.04560000076889992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,10240,0.03577600046992302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,3072,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,65536,0.1377280056476593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,2560,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,4096,0.02252800017595291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,3584,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,1536,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,2048,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,1024,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,256,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,128,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,512,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,12288,0.03929600119590759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,16384,0.04358400031924248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,8192,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,5120,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,10240,0.03699199855327606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,7168,0.0307839997112751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,4096,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,65536,0.11452800035476685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,3072,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,2048,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,3584,0.020640000700950623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,1536,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,2560,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,512,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,1024,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,256,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,128,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,12288,0.037728000432252884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,5120,0.024927999824285507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,10240,0.03446400165557861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,8192,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,7168,0.028896000236272812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,16384,0.04435199871659279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,65536,0.10771200060844421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,3584,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,2048,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,3072,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,4096,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,2560,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,1024,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,1536,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,512,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,256,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,128,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,12288,0.035360001027584076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,10240,0.03209599852561951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,5120,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,8192,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,7168,0.027712000533938408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,16384,0.0414079986512661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,4096,0.0226879995316267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,3072,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,2560,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,3584,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,2048,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,1536,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,65536,0.09424000233411789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,1024,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,512,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,256,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,128,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,12288,0.0315839983522892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,10240,0.03046399913728237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,7168,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,8192,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,5120,0.023871999233961105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,16384,0.036320000886917114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,4096,0.02070399932563305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,65536,0.08656000345945358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,3584,0.019039999693632126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,3072,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,1536,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,2048,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,1024,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,12288,0.032607998698949814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,5120,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,8192,0.02723200060427189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,10240,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,7168,0.025248000398278236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,65536,0.08240000158548355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,16384,0.03564799949526787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,4096,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,3584,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,2560,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,1536,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,1024,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,256,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,12288,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,7168,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,8192,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,5120,0.02038400061428547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,10240,0.026176000013947487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,16384,0.03235200047492981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,65536,0.07094399631023407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,4096,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,3584,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,2560,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,1536,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,12288,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,8192,0.02304000034928322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,5120,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,7168,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,65536,0.06387200206518173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,16384,0.028416000306606293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,4096,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,10240,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,3584,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,2048,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,2560,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,1024,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,512,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,12288,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,8192,0.022943999618291855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,16384,0.026528000831604004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,7168,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,10240,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,5120,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,65536,0.051872000098228455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,4096,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,3584,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,3072,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,2048,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,12288,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,65536,0.048287998884916306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,10240,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,5120,0.021344000473618507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,7168,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,8192,0.02304000034928322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,16384,0.02860799990594387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,4096,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,3584,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,2560,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,2048,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,3072,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,1536,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,65536,0.04224000126123428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,4096,0.11062400043010712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,8192,0.2062080055475235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,12288,0.29795199632644653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,3584,0.09939199686050415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,7168,0.1789119988679886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,10240,0.24934400618076324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,5120,0.13500800728797913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,3072,0.08582399785518646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,256,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,128,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,2560,0.07523199915885925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,2048,0.06470400094985962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,512,0.029920000582933426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,1024,0.04070400074124336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,1536,0.053568001836538315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,5120,0.040863998234272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,12288,0.08134400099515915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,7168,0.05135999992489815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,10240,0.06985600292682648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,8192,0.06028800085186958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,16384,0.10553599894046783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,4096,0.03488000109791756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,2560,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,3072,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,3584,0.03244800120592117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,2048,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,1536,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,1024,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,16384,0.38764798641204834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,512,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,256,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,128,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,12288,0.08092799782752991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,10240,0.06886400282382965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,8192,0.057023998349905014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,5120,0.040192000567913055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,16384,0.10396800190210342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,7168,0.050783999264240265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,4096,0.033984001725912094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,3584,0.03145600110292435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,3072,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,2560,0.026176000013947487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,2048,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,1536,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,1024,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,512,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,256,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,128,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,65536,0.3813759982585907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,12288,0.0801599994301796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,8192,0.05788800120353699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,7168,0.05164799839258194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,10240,0.0689919963479042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,16384,0.10447999835014343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,5120,0.04016000032424927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,4096,0.033695999532938004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,3584,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,3072,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,2560,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,2048,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,1536,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,65536,0.3843519985675812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,512,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,1024,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,256,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,128,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,8192,0.050335999578237534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,10240,0.05689600110054016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,5120,0.032575998455286026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,7168,0.0416640006005764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,12288,0.05110400170087814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,16384,0.06384000182151794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,3584,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,3072,0.023231999948620796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,4096,0.02796800062060356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,2560,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,2048,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,65536,0.37619200348854065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,1536,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,1024,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,512,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,256,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,128,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,12288,0.04899200052022934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,5120,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,8192,0.04428799822926521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,7168,0.04009599983692169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,10240,0.043455999344587326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,16384,0.0597120001912117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,4096,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,3072,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,2048,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,2560,0.020128000527620316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,1536,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,3584,0.02457600086927414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,65536,0.1993280053138733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,1024,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,512,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,256,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,128,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,8192,0.03593600168824196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,7168,0.03280000016093254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,12288,0.04259200021624565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,10240,0.03814399987459183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,5120,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,16384,0.04979199916124344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,4096,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,3584,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,2048,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,2560,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,3072,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,65536,0.17907199263572693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,1536,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,128,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,256,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,512,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,1024,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,12288,0.0390079990029335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,5120,0.026815999299287796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,7168,0.030848000198602676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,8192,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,10240,0.036479998379945755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,16384,0.04521600157022476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,65536,0.13497599959373474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,3584,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,3072,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,2560,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,4096,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,2048,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,1536,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,512,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,1024,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,256,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,128,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,12288,0.039264000952243805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,5120,0.028031999245285988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,16384,0.04368000105023384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,8192,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,7168,0.0318400003015995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,10240,0.036479998379945755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,65536,0.11372800171375275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,1536,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,3072,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,4096,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,2048,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,2560,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,3584,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,1024,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,256,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,512,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,128,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,12288,0.03705599904060364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,7168,0.028575999662280083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,8192,0.03203200176358223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,16384,0.043296001851558685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,10240,0.034591998904943466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,5120,0.026016000658273697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,4096,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,65536,0.10492800176143646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,3072,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,3584,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,2560,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,1024,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,1536,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,2048,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,512,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,256,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,12288,0.033984001725912094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,5120,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,10240,0.03200000151991844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,8192,0.030271999537944794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,7168,0.02844800055027008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,65536,0.09516800194978714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,16384,0.0414079986512661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,3584,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,4096,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,3072,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,1536,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,2048,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,1024,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,2560,0.01772800087928772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,512,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,256,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,128,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,12288,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,10240,0.029632000252604485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,16384,0.03711999952793121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,8192,0.028575999662280083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,5120,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,4096,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,7168,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,3584,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,3072,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,2560,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,65536,0.08460800349712372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,1536,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,2048,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,256,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,1024,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,512,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,128,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,12288,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,5120,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,65536,0.0791039988398552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,10240,0.02953599952161312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,7168,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,16384,0.03561599925160408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,8192,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,4096,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,3584,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,3072,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,2560,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,1536,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,1024,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,2048,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,512,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,256,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,12288,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,10240,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,5120,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,7168,0.024064000695943832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,16384,0.03062400035560131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,8192,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,65536,0.07065600156784058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,4096,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,3072,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,2560,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,2048,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,3584,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,1024,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,1536,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,256,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,512,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,12288,0.024671999737620354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,16384,0.029184000566601753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,10240,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,7168,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,65536,0.06387200206518173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,5120,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,8192,0.023615999147295952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,4096,0.020320000126957893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,1024,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,2048,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,2560,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,3584,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,512,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,3072,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,1536,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,128,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,256,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,12288,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,8192,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,10240,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,65536,0.04729599878191948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,5120,0.02332800067961216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,16384,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,7168,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,4096,0.020640000700950623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,3072,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,3584,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,2560,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,2048,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,1024,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,1536,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,512,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,128,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,256,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,12288,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,8192,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,65536,0.04896000027656555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,10240,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,16384,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,7168,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,5120,0.023072000592947006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,4096,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,2048,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,2560,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,1536,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,512,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,65536,0.043168000876903534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,4096,0.11664000153541565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,7168,0.1881600022315979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,8192,0.2146880030632019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,3584,0.10534399747848511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,12288,0.30556800961494446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,5120,0.13887999951839447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,3072,0.08959999680519104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,10240,0.26105600595474243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,2560,0.07820799946784973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,512,0.03510399907827377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,256,0.020128000527620316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,1536,0.05427199974656105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,1024,0.041760001331567764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,2048,0.066880002617836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,16384,0.40108799934387207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,128,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,7168,0.05603199824690819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,5120,0.04297599941492081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,4096,0.03766399994492531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,8192,0.0623680017888546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,10240,0.07561600208282471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,12288,0.08659200370311737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,16384,0.11388800293207169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,1536,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,2560,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,1024,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,3072,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,2048,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,512,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,3584,0.03404799848794937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,256,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,128,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,12288,0.08079999685287476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,8192,0.057920001447200775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,7168,0.051392000168561935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,5120,0.03964800015091896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,16384,0.1043199971318245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,10240,0.06800000369548798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,4096,0.03481600061058998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,3584,0.031328000128269196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,3072,0.02860799990594387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,1536,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,2048,0.022784000262618065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,2560,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,1024,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,128,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,256,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,512,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,65536,0.41363200545310974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,12288,0.07980799674987793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,8192,0.0575999990105629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,7168,0.05119999870657921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,10240,0.06867200136184692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,16384,0.10454399883747101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,5120,0.03977600112557411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,4096,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,3584,0.03126399964094162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,2560,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,3072,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,2048,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,65536,0.3829120099544525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,1536,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,256,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,512,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,1024,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,128,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,12288,0.05289600044488907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,8192,0.054816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,7168,0.0496320016682148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,16384,0.06284800171852112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,5120,0.037567999213933945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,10240,0.04761600121855736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,4096,0.0297279991209507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,65536,0.38438400626182556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,3584,0.029120000079274178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,1536,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,3072,0.02751999907195568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,1024,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,2560,0.02284800074994564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,2048,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,256,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,128,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,12288,0.04822399839758873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,8192,0.051711998879909515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,7168,0.0453759990632534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,10240,0.045471999794244766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,16384,0.06054399907588959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,5120,0.03385600075125694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,4096,0.02908799983561039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,3072,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,2560,0.022143999114632607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,2048,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,3584,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,1024,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,1536,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,65536,0.19971199333667755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,512,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,128,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,256,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,12288,0.04185599833726883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,7168,0.03190400078892708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,10240,0.03734400123357773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,8192,0.036639999598264694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,16384,0.050016000866889954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,5120,0.03152000159025192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,4096,0.027648000046610832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,3584,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,2560,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,65536,0.1783680021762848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,2048,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,1536,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,3072,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,512,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,1024,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,256,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,128,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,12288,0.03824000060558319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,10240,0.036607999354600906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,5120,0.02812799997627735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,7168,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,65536,0.13568000495433807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,16384,0.04499199986457825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,8192,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,4096,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,3584,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,2560,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,2048,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,3072,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,1536,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,1024,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,512,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,256,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,128,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,12288,0.039423998445272446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,7168,0.03094400092959404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,5120,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,10240,0.03862399980425835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,8192,0.03417599946260452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,16384,0.04387199878692627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,4096,0.022943999618291855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,3584,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,3072,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,2560,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,65536,0.11420799791812897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,2048,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,1536,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,512,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,256,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,1024,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,12288,0.036896001547575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,8192,0.0318400003015995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,16384,0.043136000633239746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,10240,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,5120,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,7168,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,4096,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,65536,0.10355199873447418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,3584,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,2560,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,2048,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,1536,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,3072,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,512,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,256,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,1024,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,128,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,12288,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,5120,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,10240,0.03190400078892708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,65536,0.09459199756383896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,8192,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,7168,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,16384,0.04131200164556503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,4096,0.023231999948620796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,3072,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,2048,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,3584,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,2560,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,1536,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,1024,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,512,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,128,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,12288,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,16384,0.037696000188589096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,10240,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,5120,0.02284800074994564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,7168,0.02672000043094158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,8192,0.028543999418616295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,4096,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,65536,0.0862400010228157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,3584,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,2560,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,2048,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,3072,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,1536,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,1024,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,512,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,256,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,12288,0.03136000037193298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,7168,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,5120,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,10240,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,8192,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,16384,0.036479998379945755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,65536,0.07788799703121185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,4096,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,3584,0.01945599913597107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,3072,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,2560,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,2048,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,1536,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,1024,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,256,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,512,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,128,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,12288,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,65536,0.0703359991312027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,5120,0.022143999114632607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,10240,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,8192,0.026016000658273697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,16384,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,7168,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,4096,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,3584,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,2560,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,2048,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,3072,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,1536,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,1024,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,256,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,128,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,512,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,12288,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,16384,0.029983999207615852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,8192,0.022911999374628067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,7168,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,10240,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,4096,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,5120,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,65536,0.06259199976921082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,3072,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,3584,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,1536,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,2560,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,2048,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,1024,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,512,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,128,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,12288,0.02755199931561947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,65536,0.047040000557899475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,7168,0.02579200081527233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,16384,0.029055999591946602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,10240,0.02691200003027916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,8192,0.026496000587940216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,5120,0.02239999920129776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,4096,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,3584,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,3072,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,2560,0.01651199907064438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,1024,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,1536,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,2048,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,512,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,128,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,12288,0.02812799997627735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,10240,0.02768000029027462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,7168,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,5120,0.022016000002622604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,65536,0.04793599992990494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,16384,0.030208000913262367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,4096,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,8192,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,3072,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,2048,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,1536,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,2560,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,3584,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,1024,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,256,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,65536,0.04700800031423569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,4096,0.08895999938249588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,5120,0.10675200074911118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,7168,0.14521600306034088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,3584,0.07983999699354172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,12288,0.24003200232982635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,8192,0.166143998503685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,10240,0.20323200523853302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,3072,0.07068800181150436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,2560,0.06006399914622307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,1536,0.04262400045990944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,256,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,2048,0.05075199902057648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,1024,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,512,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,128,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,16384,0.31327998638153076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,5120,0.033440001308918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,8192,0.04806400090456009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,7168,0.04307200014591217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,4096,0.028896000236272812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,12288,0.0682239979505539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,10240,0.0586559996008873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,3072,0.02396799996495247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,3584,0.026623999699950218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,2560,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,16384,0.08716800063848495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,2048,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,1536,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,1024,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,512,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,256,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,128,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,12288,0.06761600077152252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,5120,0.03299200162291527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,8192,0.04710400104522705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,7168,0.04259200021624565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,10240,0.0575999990105629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,16384,0.0862400010228157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,3072,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,4096,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,2560,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,2048,0.019200000911951065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,3584,0.026623999699950218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,1536,0.01692800037562847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,1024,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,256,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,512,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,128,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,65536,0.3128640055656433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,12288,0.06656000018119812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,10240,0.057472001761198044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,8192,0.047359999269247055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,7168,0.042080000042915344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,5120,0.033215999603271484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,4096,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,3584,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,16384,0.08569599688053131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,3072,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,2560,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,65536,0.3105599880218506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,2048,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,1536,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,1024,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,512,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,128,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,256,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,12288,0.04310400038957596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,10240,0.0395519994199276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,8192,0.03625600039958954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,5120,0.025728000327944756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,7168,0.03219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,16384,0.05398400127887726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,4096,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,3584,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,3072,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,2560,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,2048,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,1024,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,1536,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,65536,0.3105599880218506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,512,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,12288,0.0416640006005764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,8192,0.033952001482248306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,7168,0.031968001276254654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,5120,0.025631999596953392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,16384,0.05036799982190132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,10240,0.037567999213933945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,3584,0.02038400061428547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,4096,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,3072,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,65536,0.16457599401474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,2048,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,2560,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,1536,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,1024,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,512,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,12288,0.03702399879693985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,7168,0.028896000236272812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,10240,0.03248000144958496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,5120,0.024607999250292778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,8192,0.029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,16384,0.04243199899792671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,3584,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,2048,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,3072,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,2560,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,4096,0.021695999428629875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,1536,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,65536,0.14697599411010742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,1024,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,512,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,12288,0.03347200155258179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,7168,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,10240,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,8192,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,5120,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,16384,0.03840000182390213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,65536,0.11184000223875046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,4096,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,3584,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,2560,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,3072,0.01833599992096424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,2048,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,1536,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,256,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,12288,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,10240,0.03145600110292435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,5120,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,7168,0.026528000831604004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,8192,0.028704000636935234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,16384,0.036607999354600906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,4096,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,65536,0.09382399916648865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,3072,0.017503999173641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,2560,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,3584,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,2048,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,1536,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,256,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,12288,0.03267199918627739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,10240,0.029983999207615852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,5120,0.022336000576615334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,7168,0.025407999753952026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,8192,0.02659199945628643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,65536,0.08710400015115738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,16384,0.035999998450279236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,3584,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,2560,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,3072,0.01744000054895878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,1536,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,1024,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,2048,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,256,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,12288,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,8192,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,7168,0.023744000121951103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,5120,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,16384,0.03564799949526787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,10240,0.02816000021994114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,65536,0.07782399654388428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,3584,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,2560,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,1536,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,1024,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,12288,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,5120,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,7168,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,10240,0.02643200010061264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,16384,0.03235200047492981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,8192,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,4096,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,65536,0.07203199714422226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,3584,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,3072,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,2048,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,1536,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,1024,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,12288,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,8192,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,5120,0.020800000056624413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,10240,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,7168,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,16384,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,65536,0.06268800050020218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,4096,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,2048,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,2560,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,1536,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,3072,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,12288,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,7168,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,5120,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,8192,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,10240,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,16384,0.0289280004799366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,65536,0.05833600088953972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,4096,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,3584,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,2560,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,2048,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,12288,0.024607999250292778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,8192,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,10240,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,7168,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,65536,0.05251200124621391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,5120,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,16384,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,3584,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,4096,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,2560,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,1536,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,2048,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,3072,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,1024,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,512,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,12288,0.030047999694943428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,10240,0.02985600009560585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,16384,0.03097599931061268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,5120,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,8192,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,65536,0.0451200008392334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,7168,0.025280000641942024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,4096,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,2560,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,3584,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,3072,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,2048,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,1536,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,1024,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,12288,0.030848000198602676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,10240,0.030400000512599945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,8192,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,7168,0.025248000398278236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,16384,0.0315839983522892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,65536,0.04169600084424019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,5120,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,4096,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,2560,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,2048,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,1024,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,3072,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,512,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,3584,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,65536,0.04342399910092354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,4096,0.08972799777984619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,5120,0.10787200182676315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,7168,0.14640000462532043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,8192,0.16406400501728058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,3584,0.08006399869918823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,10240,0.2025279998779297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,12288,0.23974399268627167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,2560,0.060416001826524734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,3072,0.06988800317049026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,1536,0.041471999138593674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,2048,0.05212799832224846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,1024,0.03219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,512,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,256,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,128,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,16384,0.31622400879859924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,5120,0.03376000002026558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,7168,0.04390399903059006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,10240,0.058687999844551086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,8192,0.047839999198913574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,4096,0.029759999364614487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,12288,0.06771200150251389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,16384,0.08723200112581253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,3584,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,2560,0.021695999428629875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,1536,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,512,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,3072,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,1024,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,2048,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,256,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,12288,0.06703999638557434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,5120,0.03299200162291527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,7168,0.04371200129389763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,8192,0.04707200080156326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,10240,0.056703999638557434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,16384,0.0857279971241951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,3072,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,3584,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,4096,0.02828799933195114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,2560,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,1536,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,2048,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,1024,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,512,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,256,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,128,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,65536,0.3131519854068756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,12288,0.06678400188684464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,10240,0.056992001831531525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,7168,0.04303999990224838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,8192,0.04732799902558327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,16384,0.08604799956083298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,4096,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,3584,0.025728000327944756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,5120,0.03299200162291527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,3072,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,2560,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,2048,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,1536,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,65536,0.31068798899650574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,1024,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,512,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,256,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,12288,0.04419200122356415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,8192,0.035392001271247864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,10240,0.03916800022125244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,5120,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,16384,0.05283199995756149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,7168,0.033055998384952545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,4096,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,3072,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,3584,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,2048,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,2560,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,65536,0.3107199966907501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,1536,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,1024,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,512,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,12288,0.04195199906826019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,8192,0.033344000577926636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,5120,0.02579200081527233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,7168,0.03206399828195572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,10240,0.03846399858593941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,16384,0.049536000937223434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,3584,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,3072,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,4096,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,2560,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,2048,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,1536,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,65536,0.16335999965667725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,512,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,1024,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,256,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,12288,0.036607999354600906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,8192,0.028511999174952507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,7168,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,5120,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,10240,0.031808000057935715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,16384,0.04358400031924248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,4096,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,3584,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,3072,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,2048,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,2560,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,65536,0.14796799421310425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,1536,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,1024,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,12288,0.033376000821590424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,7168,0.027936000376939774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,8192,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,10240,0.030079999938607216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,5120,0.02409599907696247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,16384,0.03814399987459183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,65536,0.11273600161075592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,4096,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,3072,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,3584,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,2048,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,1536,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,12288,0.031968001276254654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,10240,0.030688000842928886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,5120,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,16384,0.03734400123357773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,7168,0.02659199945628643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,8192,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,65536,0.0923520028591156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,3072,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,2560,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,2048,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,3584,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,1024,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,12288,0.032416000962257385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,10240,0.029503999277949333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,8192,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,5120,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,16384,0.03638400137424469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,7168,0.025696000084280968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,65536,0.08681599795818329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,4096,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,2560,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,3072,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,2048,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,12288,0.031039999797940254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,65536,0.07747200131416321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,10240,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,7168,0.02425600029528141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,8192,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,5120,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,16384,0.03705599904060364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,3584,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,2560,0.015807999297976494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,3072,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,2048,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,1536,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,12288,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,8192,0.02396799996495247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,10240,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,7168,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,5120,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,16384,0.03251200169324875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,65536,0.07004799693822861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,4096,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,3584,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,2560,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,1536,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,12288,0.02723200060427189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,16384,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,10240,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,8192,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,7168,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,5120,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,4096,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,65536,0.06272000074386597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,3584,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,3072,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,1024,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,512,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,12288,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,7168,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,5120,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,8192,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,10240,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,16384,0.02969600073993206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,4096,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,65536,0.0560000017285347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,3584,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,3072,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,1024,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,1536,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,2560,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,512,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,12288,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,8192,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,7168,0.024064000695943832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,5120,0.02271999977529049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,10240,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,16384,0.027135999873280525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,65536,0.05273599922657013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,4096,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,3584,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,3072,0.01692800037562847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,2560,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,1536,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,1024,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,12288,0.033824000507593155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,5120,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,8192,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,65536,0.04848000034689903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,16384,0.03526400029659271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,10240,0.03219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,7168,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,4096,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,3584,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,3072,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,2560,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,1536,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,512,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,128,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,12288,0.03404799848794937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,10240,0.03190400078892708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,65536,0.04265600070357323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,7168,0.025280000641942024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,8192,0.027168000116944313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,16384,0.03484800085425377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,4096,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,5120,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,3072,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,3584,0.01817600056529045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,2560,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,1536,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,2048,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,1024,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,512,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,128,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,65536,0.044895999133586884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,5120,0.09692800045013428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,4096,0.08003199845552444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,3584,0.06918399780988693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,8192,0.14723199605941772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,7168,0.12908799946308136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,12288,0.21356800198554993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,10240,0.17564800381660461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,3072,0.06137600168585777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,2560,0.05385600030422211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,2048,0.045184001326560974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,1024,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,256,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,512,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,1536,0.03929600119590759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,128,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,16384,0.2746239900588989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,4096,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,5120,0.034304000437259674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,7168,0.041280001401901245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,10240,0.05385600030422211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,8192,0.045504000037908554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,12288,0.060416001826524734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,16384,0.07660800218582153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,3584,0.027168000116944313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,3072,0.023871999233961105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,2560,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,512,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,1024,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,1536,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,256,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,2048,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,128,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,12288,0.05878400057554245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,8192,0.04118400067090988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,5120,0.030079999938607216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,7168,0.03836800158023834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,10240,0.050303999334573746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,16384,0.0745600014925003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,4096,0.025280000641942024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,3584,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,3072,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,2560,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,2048,0.017503999173641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,1536,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,512,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,1024,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,256,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,12288,0.05728000029921532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,8192,0.04092799872159958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,10240,0.04912000149488449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,7168,0.03779200091958046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,16384,0.07427199929952621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,3584,0.02252800017595291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,65536,0.2704319953918457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,4096,0.025696000084280968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,5120,0.028991999104619026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,2560,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,3072,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,2048,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,1024,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,256,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,1536,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,65536,0.2640320062637329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,12288,0.043168000876903534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,10240,0.04089599847793579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,8192,0.03308799862861633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,7168,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,16384,0.05206400156021118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,5120,0.02816000021994114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,4096,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,65536,0.2635520100593567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,3584,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,3072,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,2048,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,1024,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,1536,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,512,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,128,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,12288,0.03961599990725517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,7168,0.030527999624609947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,8192,0.03235200047492981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,16384,0.04646399989724159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,5120,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,10240,0.03686400130391121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,3584,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,65536,0.152319997549057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,2560,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,3072,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,1536,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,2048,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,512,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,12288,0.03500799834728241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,10240,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,8192,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,5120,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,7168,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,16384,0.03888000175356865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,4096,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,3584,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,2560,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,3072,0.01744000054895878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,2048,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,65536,0.13619199395179749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,12288,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,10240,0.02879999950528145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,7168,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,8192,0.025631999596953392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,5120,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,16384,0.036288000643253326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,4096,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,65536,0.10361599922180176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,2560,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,3072,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,2048,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,1024,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,12288,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,5120,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,7168,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,65536,0.08697599917650223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,10240,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,8192,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,16384,0.03596799820661545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,4096,0.02070399932563305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,3584,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,2048,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,1536,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,12288,0.029600000008940697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,5120,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,8192,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,7168,0.024800000712275505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,10240,0.027744000777602196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,16384,0.03232000023126602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,65536,0.08051200211048126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,4096,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,3072,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,2048,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,2560,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,128,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,12288,0.028511999174952507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,5120,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,8192,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,7168,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,10240,0.025919999927282333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,65536,0.06937599927186966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,16384,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,4096,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,3584,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,3072,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,2048,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,256,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,12288,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,7168,0.02239999920129776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,8192,0.022272000089287758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,10240,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,16384,0.03174399957060814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,5120,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,4096,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,3584,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,3072,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,65536,0.06521599739789963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,2560,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,2048,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,512,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,12288,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,16384,0.028575999662280083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,10240,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,8192,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,5120,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,65536,0.05660799890756607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,7168,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,4096,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,3584,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,3072,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,2560,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,2048,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,256,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,12288,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,10240,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,16384,0.026655999943614006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,8192,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,7168,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,5120,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,65536,0.05158400163054466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,4096,0.020096000283956528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,3072,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,2560,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,2048,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,3584,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,1536,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,1024,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,512,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,128,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,12288,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,10240,0.03049599938094616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,5120,0.02163200080394745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,65536,0.047968000173568726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,7168,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,16384,0.03161599859595299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,8192,0.02768000029027462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,4096,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,3584,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,3072,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,2560,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,2048,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,1024,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,12288,0.034752000123262405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,65536,0.04153599962592125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,16384,0.04320000112056732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,10240,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,8192,0.026815999299287796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,7168,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,5120,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,3584,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,4096,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,3072,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,1536,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,2560,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,2048,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,256,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,12288,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,10240,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,65536,0.04972799867391586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,8192,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,5120,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,16384,0.04108799993991852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,7168,0.025631999596953392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,4096,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,3072,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,3584,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,2048,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,2560,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,1536,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,512,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,65536,0.051072001457214355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,4096,0.0761599987745285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,5120,0.09446399658918381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,3584,0.06985600292682648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,7168,0.12601600587368011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,8192,0.14319999516010284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,12288,0.21014399826526642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,10240,0.17628799378871918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,2048,0.04358400031924248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,2560,0.052000001072883606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,512,0.019039999693632126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,3072,0.060416001826524734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,1536,0.03622400015592575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,1024,0.028543999418616295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,256,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,128,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,16384,0.27804800868034363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,12288,0.05999999865889549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,5120,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,7168,0.04156799986958504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,8192,0.047359999269247055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,4096,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,10240,0.05299200117588043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,16384,0.07526399940252304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,3072,0.022911999374628067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,1536,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,3584,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,1024,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,512,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,2560,0.020479999482631683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,2048,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,256,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,12288,0.05366399884223938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,5120,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,8192,0.03916800022125244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,10240,0.04825599864125252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,7168,0.03625600039958954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,16384,0.06896000355482101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,4096,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,3584,0.02377600036561489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,2048,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,2560,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,3072,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,1024,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,1536,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,512,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,256,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,65536,0.27670401334762573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,12288,0.05331199988722801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,10240,0.04700800031423569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,8192,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,7168,0.035232000052928925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,16384,0.06771200150251389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,5120,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,4096,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,3584,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,3072,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,2560,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,2048,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,1536,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,256,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,512,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,1024,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,65536,0.24278399348258972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,12288,0.04291199892759323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,10240,0.037856001406908035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,8192,0.03203200176358223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,7168,0.030271999537944794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,5120,0.02739199995994568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,16384,0.053888000547885895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,3584,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,3072,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,2560,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,2048,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,65536,0.2465600073337555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,1536,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,1024,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,512,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,256,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,12288,0.040192000567913055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,8192,0.032896000891923904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,7168,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,10240,0.03705599904060364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,5120,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,16384,0.04809600114822388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,4096,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,65536,0.150751993060112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,3584,0.020096000283956528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,3072,0.017343999817967415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,2560,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,2048,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,1536,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,12288,0.03497600182890892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,7168,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,8192,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,5120,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,10240,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,16384,0.03884800150990486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,3584,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,3072,0.017343999817967415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,4096,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,65536,0.13337600231170654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,2048,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,2560,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,1536,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,512,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,12288,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,16384,0.033824000507593155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,8192,0.024191999807953835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,10240,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,5120,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,7168,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,65536,0.10044799745082855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,4096,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,3072,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,2560,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,3584,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,1536,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,256,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,12288,0.029055999591946602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,10240,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,5120,0.020160000771284103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,7168,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,8192,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,16384,0.033344000577926636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,65536,0.08499199897050858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,4096,0.020160000771284103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,3584,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,2560,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,2048,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,1536,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,256,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,12288,0.02860799990594387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,8192,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,7168,0.022943999618291855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,10240,0.025312000885605812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,5120,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,16384,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,4096,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,65536,0.07731200009584427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,3584,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,2560,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,2048,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,12288,0.02755199931561947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,8192,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,10240,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,65536,0.06864000111818314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,5120,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,7168,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,16384,0.02969600073993206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,4096,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,3584,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,2048,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,3072,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,12288,0.026623999699950218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,16384,0.02860799990594387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,8192,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,5120,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,10240,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,7168,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,4096,0.020479999482631683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,65536,0.06403200328350067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,3584,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,3072,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,2560,0.016287999227643013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,1024,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,12288,0.02707199938595295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,8192,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,7168,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,65536,0.057023998349905014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,5120,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,10240,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,16384,0.028416000306606293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,4096,0.020096000283956528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,2560,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,3584,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,3072,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,12288,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,16384,0.033984001725912094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,8192,0.028736000880599022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,5120,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,7168,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,10240,0.029503999277949333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,4096,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,65536,0.04947200044989586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,3584,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,2560,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,3072,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,512,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,128,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,12288,0.03494400158524513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,5120,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,16384,0.04364800080657005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,7168,0.026367999613285065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,8192,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,10240,0.030271999537944794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,65536,0.04668800160288811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,4096,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,2560,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,3584,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,3072,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,2048,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,1024,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,128,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,12288,0.03392000123858452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,65536,0.050175998359918594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,10240,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,5120,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,7168,0.025696000084280968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,8192,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,16384,0.04156799986958504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,4096,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,3584,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,2560,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,2048,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,3072,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,1024,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,1536,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,512,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,256,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,12288,0.03548799827694893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,8192,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,5120,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,65536,0.07571200281381607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,7168,0.024927999824285507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,10240,0.029503999277949333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,16384,0.04076800122857094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,4096,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,2560,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,3584,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,2048,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,1536,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,65536,0.07660800218582153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,4096,0.07683199644088745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,5120,0.09299200028181076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,3584,0.0684799998998642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,8192,0.14560000598430634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,7168,0.12486399710178375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,12288,0.20396800339221954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,10240,0.17561599612236023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,2560,0.05180799961090088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,3072,0.06083200126886368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,2048,0.043007999658584595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,1536,0.03564799949526787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,1024,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,512,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,256,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,128,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,16384,0.2773439884185791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,5120,0.03276799991726875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,8192,0.04604800045490265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,12288,0.06406400352716446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,7168,0.0416640006005764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,10240,0.0522879995405674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,16384,0.0799039974808693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,4096,0.026976000517606735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,3072,0.02252800017595291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,3584,0.02643200010061264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,2560,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,2048,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,1536,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,1024,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,256,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,128,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,512,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,12288,0.0541439987719059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,5120,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,7168,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,8192,0.038495998829603195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,10240,0.04825599864125252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,16384,0.06831999868154526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,4096,0.024927999824285507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,3072,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,3584,0.022943999618291855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,2048,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,2560,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,1536,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,256,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,1024,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,12288,0.05596800148487091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,65536,0.2847999930381775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,8192,0.03951999917626381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,10240,0.04553600028157234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,7168,0.03440000116825104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,16384,0.06825599819421768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,3072,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,5120,0.02768000029027462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,3584,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,4096,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,2560,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,2048,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,1536,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,1024,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,512,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,65536,0.2436159998178482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,128,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,12288,0.043136000633239746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,5120,0.027264000847935677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,8192,0.03209599852561951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,10240,0.04047999903559685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,16384,0.05097600072622299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,7168,0.028991999104619026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,65536,0.24352000653743744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,3584,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,2560,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,2048,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,1536,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,1024,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,512,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,128,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,12288,0.04028800129890442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,8192,0.03340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,7168,0.030400000512599945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,10240,0.037248000502586365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,5120,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,16384,0.04569600149989128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,4096,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,3584,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,2560,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,1536,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,2048,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,3072,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,65536,0.14767999947071075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,1024,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,256,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,12288,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,5120,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,8192,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,7168,0.025696000084280968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,16384,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,10240,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,4096,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,2560,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,3584,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,3072,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,65536,0.13391999900341034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,2048,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,12288,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,8192,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,7168,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,5120,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,10240,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,16384,0.03561599925160408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,65536,0.10220800340175629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,4096,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,3072,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,3584,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,1536,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,2048,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,2560,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,512,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,12288,0.02908799983561039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,5120,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,16384,0.034304000437259674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,7168,0.02252800017595291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,8192,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,10240,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,65536,0.08323200047016144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,4096,0.01651199907064438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,3584,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,3072,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,2560,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,1536,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,2048,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,1024,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,12288,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,5120,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,7168,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,10240,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,8192,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,16384,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,65536,0.07791999727487564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,3584,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,4096,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,3072,0.013183999806642532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,2560,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,2048,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,1536,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,1024,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,12288,0.02457600086927414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,10240,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,7168,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,5120,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,8192,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,65536,0.07017599791288376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,16384,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,4096,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,3072,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,1536,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,2048,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,3584,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,2560,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,1024,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,512,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,12288,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,5120,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,16384,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,7168,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,10240,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,8192,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,65536,0.06441599875688553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,4096,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,2560,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,3584,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,3072,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,1536,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,2048,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,1024,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,512,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,256,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,12288,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,10240,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,7168,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,8192,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,5120,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,16384,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,65536,0.05398400127887726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,4096,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,3072,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,2048,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,3584,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,1024,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,2560,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,1536,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,512,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,256,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,128,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,12288,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,10240,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,16384,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,5120,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,7168,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,8192,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,65536,0.04960000142455101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,4096,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,3072,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,3584,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,2560,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,2048,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,1024,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,1536,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,512,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,256,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,128,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,12288,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,10240,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,16384,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,8192,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,7168,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,65536,0.045343998819589615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,4096,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,5120,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,3584,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,3072,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,2048,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,2560,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,1536,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,1024,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,512,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,128,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,256,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,12288,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,65536,0.053727999329566956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,10240,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,8192,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,16384,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,5120,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,7168,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,4096,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,3584,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,2560,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,3072,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,1024,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,512,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,2048,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,1536,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,256,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,128,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,12288,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,8192,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,65536,0.05657599866390228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,7168,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,10240,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,5120,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,16384,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,4096,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,3072,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,2560,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,3584,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,2048,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,1536,0.007584000006318092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,1024,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,512,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,256,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,128,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,65536,0.05238400027155876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,4096,0.07744000107049942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,5120,0.09433600306510925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,7168,0.12639999389648438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,8192,0.14454400539398193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,3584,0.06694400310516357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,3072,0.058720000088214874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,10240,0.17475199699401855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,2560,0.0517439991235733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,12288,0.20684799551963806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,1536,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,1024,0.028704000636935234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,2048,0.04467200115323067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,512,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,128,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,256,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,16384,0.2727360129356384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,12288,0.0655359998345375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,7168,0.04150399938225746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,8192,0.04633599892258644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,5120,0.03209599852561951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,10240,0.05251200124621391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,4096,0.026944000273942947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,16384,0.07977599650621414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,2560,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,3072,0.02271999977529049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,3584,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,1536,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,2048,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,512,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,1024,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,128,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,12288,0.04902400076389313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,5120,0.027168000116944313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,7168,0.03376000002026558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,8192,0.03612799942493439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,10240,0.04214400053024292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,16384,0.06294400244951248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,4096,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,3584,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,3072,0.020735999569296837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,2048,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,1536,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,2560,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,256,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,512,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,1024,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,12288,0.042047999799251556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,65536,0.2853440046310425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,8192,0.03187200054526329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,10240,0.03852799907326698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,7168,0.029983999207615852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,16384,0.054016001522541046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,5120,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,3072,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,3584,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,4096,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,2560,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,2048,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,1536,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,1024,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,512,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,256,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,65536,0.22032000124454498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,12288,0.03596799820661545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,5120,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,8192,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,7168,0.02518399991095066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,10240,0.03167999908328056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,16384,0.043807998299598694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,4096,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,3584,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,2560,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,3072,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,2048,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,65536,0.19593599438667297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,512,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,12288,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,5120,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,10240,0.03014400042593479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,8192,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,7168,0.022624000906944275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,16384,0.03977600112557411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,4096,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,2560,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,3072,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,2048,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,3584,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,1536,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,65536,0.14707200229167938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,12288,0.027936000376939774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,5120,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,10240,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,16384,0.03315199911594391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,7168,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,8192,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,4096,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,2560,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,3072,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,2048,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,1536,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,3584,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,65536,0.1372160017490387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,1024,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,512,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,12288,0.0226879995316267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,5120,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,8192,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,16384,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,7168,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,10240,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,4096,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,65536,0.0997759997844696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,3072,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,3584,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,2560,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,1536,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,2048,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,1024,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,256,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,128,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,512,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,12288,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,7168,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,8192,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,5120,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,10240,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,16384,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,4096,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,65536,0.07980799674987793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,3584,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,2560,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,1536,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,3072,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,2048,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,1024,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,512,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,256,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,128,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,12288,0.01817600056529045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,10240,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,8192,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,16384,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,7168,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,5120,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,65536,0.07676800340414047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,4096,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,3584,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,2560,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,3072,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,2048,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,1536,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,1024,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,512,0.007135999854654074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,256,0.0071680000983178616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,128,0.007360000163316727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,12288,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,8192,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,5120,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,10240,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,7168,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,16384,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,65536,0.06870400160551071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,4096,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,3072,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,3584,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,2560,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,2048,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,1536,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,512,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,256,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,1024,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,128,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,12288,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,16384,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,5120,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,8192,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,7168,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,10240,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,4096,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,65536,0.06172800064086914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,3584,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,2560,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,3072,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,1024,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,2048,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,512,0.0072639998979866505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,1536,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,256,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,128,0.007296000141650438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,12288,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,10240,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,8192,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,5120,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,7168,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,16384,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,65536,0.05584000051021576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,3584,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,4096,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,2560,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,3072,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,1536,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,2048,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,1024,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,128,0.00687999976798892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,512,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,256,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,12288,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,10240,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,16384,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,8192,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,7168,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,5120,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,4096,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,65536,0.04940799996256828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,3584,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,3072,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,2048,0.007455999962985516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,2560,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,1536,0.007584000006318092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,1024,0.007135999854654074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,512,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,256,0.0072639998979866505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,128,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,12288,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,10240,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,5120,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,7168,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,16384,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,65536,0.04476799815893173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,4096,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,8192,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,3584,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,3072,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,2560,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,2048,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,1536,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,256,0.006719999946653843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,512,0.006688000168651342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,1024,0.007104000076651573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,128,0.006719999946653843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,12288,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,16384,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,5120,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,10240,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,8192,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,65536,0.04265600070357323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,7168,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,4096,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,3584,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,3072,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,2048,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,2560,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,1536,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,512,0.007104000076651573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,1024,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,256,0.00684799998998642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,128,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,12288,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,8192,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,65536,0.04278400167822838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,16384,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,7168,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,10240,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,5120,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,4096,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,2048,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,1024,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,3072,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,2560,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,1536,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,3584,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,512,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,256,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,128,0.006719999946653843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,65536,0.04262400045990944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,4096,0.07046400010585785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,5120,0.08816000074148178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,7168,0.11692799627780914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,8192,0.13222399353981018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,3584,0.06393600255250931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,10240,0.1605760008096695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,12288,0.18812799453735352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,3072,0.058368001133203506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,2560,0.053119998425245285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,256,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,1536,0.035232000052928925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,512,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,2048,0.041760001331567764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,128,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,1024,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,16384,0.2471040040254593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,5120,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,8192,0.04121600091457367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,12288,0.05471999943256378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,7168,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,10240,0.048767998814582825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,4096,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,16384,0.07110399752855301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,3584,0.022143999114632607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,2048,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,3072,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,2560,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,1536,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,1024,0.013183999806642532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,512,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,12288,0.04185599833726883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,8192,0.03174399957060814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,5120,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,10240,0.036959998309612274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,7168,0.027904000133275986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,16384,0.054207999259233475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,4096,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,3072,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,2560,0.016287999227643013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,1536,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,512,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,65536,0.25491198897361755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,12288,0.03791999816894531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,8192,0.028511999174952507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,10240,0.033824000507593155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,7168,0.025280000641942024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,16384,0.04771199822425842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,4096,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,5120,0.021344000473618507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,3072,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,3584,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,2560,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,2048,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,1536,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,1024,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,65536,0.18851199746131897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,256,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,12288,0.03376000002026558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,5120,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,8192,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,7168,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,10240,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,16384,0.04291199892759323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,4096,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,3072,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,3584,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,2560,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,2048,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,1536,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,65536,0.16521599888801575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,1024,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,512,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,12288,0.03286400064826012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,5120,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,10240,0.028863999992609024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,7168,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,16384,0.03968000039458275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,8192,0.023744000121951103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,4096,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,3072,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,2560,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,1536,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,3584,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,2048,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,65536,0.15043200552463531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,1024,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,512,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,12288,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,10240,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,16384,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,8192,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,5120,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,7168,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,4096,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,3072,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,1536,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,3584,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,65536,0.1327359974384308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,2560,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,2048,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,1024,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,256,0.007455999962985516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,512,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,128,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,12288,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,16384,0.02409599907696247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,5120,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,8192,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,7168,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,10240,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,4096,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,3584,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,65536,0.08646400272846222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,3072,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,2048,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,2560,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,1536,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,1024,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,512,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,256,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,128,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,12288,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,16384,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,5120,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,8192,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,10240,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,7168,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,65536,0.07379200309515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,4096,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,3584,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,2048,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,2560,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,3072,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,512,0.007296000141650438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,1536,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,1024,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,256,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,128,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,12288,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,8192,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,7168,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,5120,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,16384,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,10240,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,65536,0.06707199662923813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,4096,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,3072,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,3584,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,2048,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,2560,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,1536,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,1024,0.007296000141650438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,256,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,128,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,512,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,12288,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,10240,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,8192,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,7168,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,16384,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,5120,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,65536,0.061216000467538834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,4096,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,3584,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,3072,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,2560,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,2048,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,1024,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,1536,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,512,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,256,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,128,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,12288,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,8192,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,10240,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,7168,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,5120,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,16384,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,4096,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,65536,0.05580800026655197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,2048,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,2560,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,1536,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,3072,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,3584,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,1024,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,512,0.0072639998979866505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,256,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,128,0.006719999946653843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,12288,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,10240,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,7168,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,5120,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,8192,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,16384,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,65536,0.05075199902057648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,4096,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,3584,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,3072,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,2048,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,2560,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,1536,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,1024,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,512,0.00684799998998642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,256,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,128,0.006624000146985054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,12288,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,8192,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,7168,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,10240,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,16384,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,5120,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,65536,0.049855999648571014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,4096,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,3584,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,2560,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,3072,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,2048,0.0072639998979866505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,1536,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,1024,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,512,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,256,0.0066559999249875546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,128,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,12288,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,16384,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,10240,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,5120,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,8192,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,4096,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,65536,0.0451200008392334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,7168,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,3584,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,3072,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,2560,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,1536,0.006912000011652708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,1024,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,2048,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,512,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,256,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,128,0.00687999976798892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,12288,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,65536,0.0432640016078949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,10240,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,16384,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,8192,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,7168,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,5120,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,4096,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,3584,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,2560,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,3072,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,1536,0.006591999903321266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,2048,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,1024,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,512,0.007104000076651573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,256,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,128,0.00684799998998642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,12288,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,10240,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,16384,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,8192,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,5120,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,7168,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,65536,0.04211200028657913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,4096,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,2048,0.00684799998998642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,2560,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,3584,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,1536,0.00687999976798892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,3072,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,1024,0.006688000168651342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,512,0.00684799998998642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,256,0.007104000076651573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,128,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,65536,0.04163200035691261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,4096,0.06825599819421768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,5120,0.08601599931716919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,8192,0.12800000607967377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,7168,0.11532799899578094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,10240,0.1595200002193451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,3584,0.061503998935222626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,12288,0.18883199989795685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,3072,0.05564799904823303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,2560,0.05132799968123436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,1024,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,512,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,1536,0.03209599852561951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,256,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,2048,0.03843199834227562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,128,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,16384,0.24662399291992188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,7168,0.03488000109791756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,8192,0.03807999938726425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,5120,0.028511999174952507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,12288,0.05417599901556969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,10240,0.04668800160288811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,16384,0.06844799965620041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,4096,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,2560,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,3584,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,3072,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,2048,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,1536,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,1024,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,512,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,256,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,128,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,12288,0.04684799909591675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,5120,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,8192,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,10240,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,7168,0.031039999797940254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,16384,0.059808000922203064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,4096,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,2560,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,3584,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,3072,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,2048,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,1536,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,1024,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,512,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,128,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,256,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,65536,0.2552320063114166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,12288,0.04383999854326248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,7168,0.029600000008940697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,8192,0.03203200176358223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,10240,0.03788800165057182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,16384,0.05558399856090546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,5120,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,4096,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,3584,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,3072,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,2560,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,2048,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,1536,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,512,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,256,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,128,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,65536,0.21660800278186798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,5120,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,12288,0.030271999537944794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,7168,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,8192,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,10240,0.026528000831604004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,16384,0.039264000952243805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,3584,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,4096,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,3072,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,2560,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,2048,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,1536,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,65536,0.20553599298000336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,512,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,256,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,1024,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,12288,0.02848000079393387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,5120,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,8192,0.021344000473618507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,7168,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,10240,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,16384,0.03433600068092346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,4096,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,2560,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,1536,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,3584,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,2048,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,3072,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,65536,0.13232000172138214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,1024,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,512,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,128,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,256,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,12288,0.023455999791622162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,16384,0.02864000014960766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,8192,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,5120,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,10240,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,7168,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,4096,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,3584,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,2560,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,2048,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,65536,0.11750400066375732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,3072,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,1536,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,256,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,512,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,1024,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,12288,0.02271999977529049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,8192,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,16384,0.027871999889612198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,5120,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,7168,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,10240,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,4096,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,65536,0.09523200243711472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,3584,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,3072,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,1536,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,2048,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,2560,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,512,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,1024,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,256,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,128,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,12288,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,5120,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,10240,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,7168,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,16384,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,8192,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,4096,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,65536,0.08294399827718735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,3584,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,3072,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,1536,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,2560,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,2048,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,1024,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,512,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,128,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,256,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,12288,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,16384,0.024159999564290047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,8192,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,7168,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,10240,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,5120,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,65536,0.07452800124883652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,4096,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,3584,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,2560,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,1536,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,1024,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,2048,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,512,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,3072,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,256,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,128,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,12288,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,10240,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,5120,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,8192,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,16384,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,7168,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,65536,0.06703999638557434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,4096,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,3072,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,2048,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,2560,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,1536,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,3584,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,512,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,1024,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,128,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,256,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,12288,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,16384,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,8192,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,10240,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,5120,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,7168,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,4096,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,65536,0.05961599946022034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,3072,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,3584,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,2560,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,2048,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,1536,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,512,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,1024,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,128,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,256,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,12288,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,8192,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,10240,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,16384,0.02252800017595291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,5120,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,7168,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,4096,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,65536,0.0522879995405674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,3072,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,3584,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,2560,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,1536,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,1024,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,2048,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,512,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,128,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,256,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,12288,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,8192,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,10240,0.016287999227643013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,16384,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,65536,0.047680001705884933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,7168,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,4096,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,5120,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,3072,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,3584,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,2560,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,2048,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,1536,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,1024,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,512,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,128,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,256,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,12288,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,10240,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,65536,0.04543999955058098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,5120,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,16384,0.021663999184966087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,8192,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,4096,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,7168,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,3584,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,3072,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,2560,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,1536,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,2048,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,1024,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,512,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,256,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,128,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,12288,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,65536,0.06796800345182419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,8192,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,5120,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,10240,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,7168,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,16384,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,4096,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,3584,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,2560,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,3072,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,1536,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,1024,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,2048,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,512,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,128,0.007424000184983015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,256,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,12288,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,10240,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,8192,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,16384,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,7168,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,65536,0.06876800209283829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,5120,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,4096,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,2560,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,3584,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,2048,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,1024,0.007296000141650438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,1536,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,3072,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,512,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,256,0.007360000163316727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,128,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,65536,0.06831999868154526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,3584,4.45904016494751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,4096,5.064000129699707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,5120,6.218048095703125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,2560,3.2999041080474854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,3072,3.8795840740203857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,7168,8.496992111206055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,2048,2.693056106567383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,512,0.9277439713478088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,256,0.5021439790725708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,8192,9.652352333068848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,128,0.38841599225997925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,1024,1.4947839975357056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,1536,2.103935956954956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,10240,3.035871982574463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,10240,11.967935562133789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,8192,2.471359968185425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,12288,3.655168056488037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,7168,2.170464038848877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,5120,1.5813119411468506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,12288,14.4716157913208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,3584,1.137984037399292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,4096,1.2872639894485474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,2560,0.8344640135765076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,3072,0.9797760248184204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,2048,0.6866559982299805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,256,0.1316480040550232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,16384,4.799263954162598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,128,0.09935999661684036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,1024,0.3891200125217438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,512,0.246848002076149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,1536,0.5382080078125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,7168,1.6400959491729736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,8192,1.8697600364685059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,10240,2.32041597366333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,12288,2.772671937942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,16384,21.375232696533203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,4096,0.9642879962921143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,5120,1.1972479820251465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,3584,0.8536959886550903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,2560,0.6331520080566406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,3072,0.7429440021514893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,2048,0.5181760191917419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,1536,0.4082239866256714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,1024,0.2996799945831299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,16384,3.643359899520874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,256,0.1048320010304451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,512,0.1897599995136261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,128,0.07795199751853943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,8192,1.529952049255371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,7168,1.3863040208816528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,5120,0.9930880069732666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,10240,1.9475840330123901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,12288,2.441567897796631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,4096,0.805728018283844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,3072,0.6159679889678955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,3584,0.7135999798774719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,2560,0.5271360278129578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,1024,0.25433599948883057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,1536,0.34300801157951355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,512,0.16009600460529327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,2048,0.43753600120544434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,256,0.08972799777984619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,128,0.06595200300216675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,16384,3.4272000789642334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,10240,1.5643199682235718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,12288,1.8826240301132202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,8192,1.2741119861602783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,4096,0.6590399742126465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,5120,0.8132799863815308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,16384,2.474400043487549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,7168,1.1123520135879517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,3584,0.5799999833106995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,2560,0.4274879992008209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,3072,0.5088319778442383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,2048,0.3571839928627014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,1536,0.2780799865722656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,128,0.05571199953556061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,256,0.07119999825954437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,512,0.12822400033473969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,1024,0.20310400426387787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,65536,11.379136085510254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,10240,1.3866239786148071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,12288,1.6384320259094238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,16384,2.2218239307403564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,7168,0.9756799936294556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,5120,0.7165120244026184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,8192,1.1185920238494873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,65536,21.602272033691406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,4096,0.5808960199356079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,3072,0.44761601090431213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,2048,0.31299200654029846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,2560,0.37887999415397644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,3584,0.5134400129318237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,1536,0.24512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,256,0.066880002617836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,512,0.11497599631547928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,128,0.05036799982190132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,1024,0.18041600286960602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,65536,16.788671493530273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,8192,0.7999039888381958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,10240,0.9861760139465332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,12288,1.1832959651947021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,5120,0.5111680030822754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,7168,0.7079359889030457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,65536,13.864224433898926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,4096,0.41600000858306885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,3584,0.37033599615097046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,2560,0.27644801139831543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,16384,1.5544639825820923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,2048,0.22598400712013245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,1536,0.17868800461292267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,1024,0.13177600502967834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,3072,0.3229439854621887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,256,0.051072001457214355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,512,0.08515200018882751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,128,0.03932800143957138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,7168,0.5634880065917969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,8192,0.6376320123672485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,10240,0.7913280129432678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,12288,0.940608024597168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,4096,0.33542400598526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,5120,0.41071999073028564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,3072,0.2579199969768524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,3584,0.29840001463890076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,16384,1.2443519830703735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,2560,0.2197439968585968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,1024,0.10713600367307663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,2048,0.18300800025463104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,1536,0.14368000626564026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,512,0.06998399645090103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,256,0.041120000183582306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,128,0.033344000577926636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,65536,10.09171199798584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,8192,0.5600000023841858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,10240,0.6914880275726318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,12288,0.8246719837188721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,7168,0.4933120012283325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,5120,0.36163198947906494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,4096,0.2924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,16384,1.081984043121338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,3584,0.2595199942588806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,3072,0.2274239957332611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,1536,0.12780800461769104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,2560,0.19404800236225128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,1024,0.0947519987821579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,2048,0.1621440052986145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,128,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,256,0.03907199949026108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,512,0.06332799792289734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,8192,0.48073598742485046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,65536,7.236639976501465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,10240,0.5954239964485168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,16384,0.9283519983291626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,12288,0.7081279754638672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,4096,0.2543039917945862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,3072,0.19606399536132812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,3584,0.22383999824523926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,5120,0.3111039996147156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,7168,0.4238080084323883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,1536,0.11104000359773636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,512,0.05580800026655197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,2048,0.1393280029296875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,1024,0.0828159973025322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,2560,0.1674560010433197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,256,0.03468799963593483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,128,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,8192,0.4034239947795868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,10240,0.49689599871635437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,65536,5.842624187469482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,12288,0.5916159749031067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,16384,0.7760319709777832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,3584,0.18812799453735352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,4096,0.21196800470352173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,5120,0.2598400115966797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,7168,0.3521600067615509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,1024,0.0703359991312027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,2048,0.1173119992017746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,1536,0.09398400038480759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,2560,0.1409599930047989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,3072,0.1642879992723465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,128,0.024800000712275505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,256,0.030271999537944794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,512,0.04684799909591675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,65536,4.976096153259277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,8192,0.32041600346565247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,10240,0.3994239866733551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,12288,0.4749760031700134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,16384,0.6197119951248169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,7168,0.2853440046310425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,5120,0.20815999805927277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,4096,0.17097599804401398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,3584,0.15158399939537048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,2048,0.09574399888515472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,3072,0.1327040046453476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,1536,0.07654400169849396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,2560,0.115167997777462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,1024,0.05788800120353699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,256,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,512,0.03846399858593941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,128,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,65536,4.268608093261719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,8192,0.24294400215148926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,10240,0.30185601115226746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,7168,0.21510399878025055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,12288,0.35974401235580444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,5120,0.15865600109100342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,4096,0.1308480054140091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,16384,0.46886399388313293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,3072,0.10143999755382538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,3584,0.11680000275373459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,512,0.029600000008940697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,1024,0.04310400038957596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,2560,0.08796799927949905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,1536,0.0578560009598732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,2048,0.07289600372314453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,256,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,128,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,65536,3.7101120948791504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,8192,0.16601599752902985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,10240,0.20310400426387787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,12288,0.23916800320148468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,7168,0.14659200608730316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,5120,0.10873600095510483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,16384,0.31648001074790955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,4096,0.09097599983215332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,3584,0.08073599636554718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,1024,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,1536,0.04121600091457367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,2560,0.06035200133919716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,2048,0.05129599943757057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,3072,0.07148800045251846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,512,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,128,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,256,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,65536,2.936415910720825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,8192,0.09244800359010696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,10240,0.10623999685049057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,12288,0.1252480000257492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,7168,0.07846400141716003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,5120,0.060896001756191254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,4096,0.050016000866889954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,16384,0.16543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,3072,0.040832001715898514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,3584,0.04499199986457825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,2560,0.03542400151491165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,512,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,1024,0.020735999569296837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,1536,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,2048,0.03049599938094616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,256,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,128,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,65536,2.112031936645508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,8192,0.05648000165820122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,10240,0.06646399945020676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,12288,0.07631999999284744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,5120,0.039744000881910324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,7168,0.050144001841545105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,4096,0.034591998904943466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,3584,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,16384,0.09833600372076035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,65536,1.4239039421081543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,1024,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,1536,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,2048,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,2560,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,512,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,3072,0.027904000133275986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,128,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,256,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,7168,0.04047999903559685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,8192,0.04540799930691719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,10240,0.05222399905323982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,12288,0.060416001826524734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,4096,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,5120,0.03267199918627739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,3584,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,3072,0.022272000089287758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,16384,0.07583999633789062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,2048,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,2560,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,512,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,1536,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,1024,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,128,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,256,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,65536,0.7095999717712402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,10240,5.989535808563232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,65536,0.36822399497032166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,4096,2.5381760597229004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,5120,3.1256000995635986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,7168,4.272096157073975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,3584,2.246079921722412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,8192,4.857696056365967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,65536,0.3023360073566437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,2560,1.6561919450759888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,3072,1.9507520198822021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,512,0.4708159863948822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,128,0.19923199713230133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,1024,0.7584319710731506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,256,0.24723200500011444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,2048,1.3642879724502563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,1536,1.0568000078201294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,10240,1.5631680488586426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,5120,0.8026880025863647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,12288,7.161920070648193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,7168,1.1114879846572876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,8192,1.2628799676895142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,3072,0.5024319887161255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,3584,0.5829759836196899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,4096,0.6615039706230164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,2560,0.4307839870452881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,12288,1.880128026008606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,1536,0.2802239954471588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,2048,0.35760000348091125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,256,0.06860800087451935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,512,0.12931199371814728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,128,0.054687999188899994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,1024,0.20387199521064758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,16384,2.4714879989624023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,10240,1.1771520376205444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,16384,9.491904258728027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,8192,0.9510400295257568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,65536,11.107839584350586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,5120,0.6107519865036011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,12288,1.4163199663162231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,7168,0.8377280235290527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,4096,0.49878400564193726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,3584,0.4394879937171936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,3072,0.38329601287841797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,2560,0.32416000962257385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,1536,0.21299199759960175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,512,0.0984639972448349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,1024,0.1560640037059784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,2048,0.2699519991874695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,256,0.05750399827957153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,128,0.04320000112056732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,16384,1.8574399948120117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,10240,0.9738559722900391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,5120,0.5102400183677673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,7168,0.6995519995689392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,8192,0.7930560111999512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,3584,0.36739200353622437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,4096,0.4139840006828308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,12288,1.1814080476760864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,2560,0.2712000012397766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,3072,0.32102400064468384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,1536,0.1791359931230545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,2048,0.22537599503993988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,256,0.04979199916124344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,512,0.08156800270080566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,1024,0.1321599930524826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,128,0.03759999945759773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,16384,1.5512640476226807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,8192,0.6374080181121826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,10240,0.7884799838066101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,12288,0.9383040070533752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,7168,0.5610880255699158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,16384,1.2408959865570068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,5120,0.4104959964752197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,3584,0.2943040132522583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,3072,0.2577280104160309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,4096,0.3370560109615326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,2560,0.21964800357818604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,1024,0.10790400207042694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,512,0.06678400188684464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,256,0.03964800015091896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,2048,0.1823039948940277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,1536,0.14451199769973755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,128,0.031968001276254654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,10240,0.6857920289039612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,12288,0.8171200156211853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,8192,0.5591040253639221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,16384,1.0788480043411255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,4096,0.2945599853992462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,5120,0.36000001430511475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,7168,0.4917120039463043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,3584,0.2596159875392914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,2560,0.19385600090026855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,2048,0.16051200032234192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,3072,0.2269439995288849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,512,0.05782400071620941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,1536,0.12803199887275696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,1024,0.10764800012111664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,256,0.03759999945759773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,128,0.029120000079274178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,65536,6.487391948699951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,10240,0.4923200011253357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,16384,0.7743679881095886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,8192,0.40140798687934875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,12288,0.5913919806480408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,5120,0.25884801149368286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,4096,0.2110079973936081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,7168,0.35465601086616516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,65536,7.555744171142578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,3584,0.18928000330924988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,3072,0.16460800170898438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,2560,0.14105600118637085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,1536,0.09385599941015244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,2048,0.11724799871444702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,512,0.04358400031924248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,1024,0.07088000327348709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,256,0.028831999748945236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,128,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,10240,0.39635199308395386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,8192,0.323199987411499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,12288,0.4745599925518036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,7168,0.28409600257873535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,65536,4.9133758544921875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,16384,0.6220800280570984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,5120,0.20902399718761444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,4096,0.17123199999332428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,2560,0.11513599753379822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,3584,0.15244799852371216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,3072,0.1329600065946579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,1536,0.07760000228881836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,2048,0.09619200229644775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,1024,0.05753599852323532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,512,0.036320000886917114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,256,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,128,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,65536,4.272960186004639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,8192,0.2871040105819702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,10240,0.351967990398407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,12288,0.415039986371994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,7168,0.2547839879989624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,5120,0.19011199474334717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,4096,0.1568319946527481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,16384,0.5414080023765564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,3584,0.141184002161026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,1024,0.05798399820923805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,3072,0.12511999905109406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,1536,0.07654400169849396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,2048,0.09196799993515015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,2560,0.11001600325107574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,128,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,256,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,512,0.033504001796245575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,65536,3.4233601093292236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,8192,0.24236799776554108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,12288,0.3548479974269867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,10240,0.302047997713089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,16384,0.46886399388313293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,65536,2.4448959827423096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,7168,0.2146880030632019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,5120,0.16115200519561768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,4096,0.13040000200271606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,2048,0.0735040009021759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,3584,0.11708799749612808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,2560,0.08892799913883209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,3072,0.10291200131177902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,1536,0.05967999994754791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,512,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,1024,0.045791998505592346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,128,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,256,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,7168,0.18636800348758698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,8192,0.21001599729061127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,10240,0.25628799200057983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,12288,0.30140799283981323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,4096,0.11894399672746658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,16384,0.3935999870300293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,5120,0.14124800264835358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,3584,0.10841599851846695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,3072,0.09683199971914291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,2048,0.0724480003118515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,1536,0.05999999865889549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,512,0.026367999613285065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,2560,0.08419200032949448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,1024,0.03923200070858002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,256,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,128,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,65536,2.071199893951416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,10240,0.20207999646663666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,8192,0.16595199704170227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,12288,0.24025599658489227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,7168,0.14723199605941772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,5120,0.1103999987244606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,16384,0.31465598940849304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,4096,0.09071999788284302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,3584,0.08099199831485748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,3072,0.07126399874687195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,2560,0.06319999694824219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,1024,0.033440001308918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,1536,0.04368000105023384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,2048,0.052799999713897705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,512,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,256,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,128,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,65536,1.8532480001449585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,8192,0.1342719942331314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,12288,0.1881919950246811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,10240,0.16201600432395935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,7168,0.12003199756145477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,16384,0.24268800020217896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,3584,0.07142399996519089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,5120,0.09324800223112106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,4096,0.07897599786520004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,3072,0.06431999802589417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,1024,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,2560,0.05663999915122986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,512,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,1536,0.03840000182390213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,2048,0.04992000013589859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,256,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,128,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,65536,1.6752640008926392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,8192,0.14416000247001648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,12288,0.12476799637079239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,10240,0.175135999917984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,7168,0.0777600035071373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,5120,0.059167999774217606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,3584,0.06841599941253662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,16384,0.1624639928340912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,3072,0.03996799886226654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,4096,0.04975999891757965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,512,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,65536,1.2293119430541992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,1536,0.025631999596953392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,1024,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,2560,0.03545600175857544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,2048,0.030047999694943428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,128,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,256,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,65536,0.8957440257072449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,7168,0.04614400118589401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,8192,0.05097600072622299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,10240,0.06204799935221672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,12288,0.07094399631023407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,2560,0.028991999104619026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,3072,0.025312000885605812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,3584,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,4096,0.03014400042593479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,16384,0.0910400003194809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,5120,0.03699199855327606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,512,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,1024,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,1536,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,128,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,256,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,2048,0.020160000771284103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,7168,0.036607999354600906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,8192,0.041152000427246094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,10240,0.04684799909591675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,12288,0.053247999399900436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,4096,0.026176000013947487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,5120,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,16384,0.06419199705123901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,3072,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,3584,0.023231999948620796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,65536,0.637503981590271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,2048,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,1536,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,2560,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,512,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,1024,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,256,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,8192,0.03404799848794937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,7168,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,10240,0.04201599955558777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,12288,0.04841599985957146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,4096,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,3584,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,5120,0.02518399991095066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,16384,0.05539200082421303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,3072,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,65536,0.31679999828338623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,1536,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,512,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,1024,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,2048,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,2560,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,16384,4.7815680503845215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,65536,0.18979200720787048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,5120,1.5743999481201172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,7168,2.141439914703369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,3584,1.1315840482711792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,4096,1.2776960134506226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,65536,0.16051200032234192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,8192,2.4334399700164795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,3072,0.9875199794769287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,512,0.23123200237751007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,1024,0.3906239867210388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,1536,0.5376319885253906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,256,0.1276479959487915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,2048,0.6887680292129517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,128,0.10252799838781357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,10240,3.0252161026000977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,2560,0.8358399868011475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,8192,0.6346560120582581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,12288,0.9361600279808044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,12288,3.610208034515381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,16384,1.236575961112976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,5120,0.40959998965263367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,3584,0.2950719892978668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,4096,0.3314560055732727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,7168,0.5625280141830444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,3072,0.2587839961051941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,2560,0.2199680060148239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,512,0.0607680007815361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,2048,0.18111999332904816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,1024,0.1064319983124733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,256,0.039423998445272446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,1536,0.14454400539398193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,10240,0.7883520126342773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,128,0.03292800113558769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,5120,0.3094080090522766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,7168,0.4214079976081848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,4096,0.2524160146713257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,8192,0.48051199316978455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,3584,0.22409600019454956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,10240,0.5946879982948303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,12288,0.7080000042915344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,3072,0.1963520050048828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,2048,0.13884800672531128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,2560,0.16780799627304077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,256,0.03286400064826012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,512,0.04915200173854828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,1536,0.11190400272607803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,1024,0.08326400071382523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,128,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,16384,0.925823986530304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,8192,0.40195199847221375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,7168,0.3551360070705414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,10240,0.49647998809814453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,12288,0.5903040170669556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,16384,0.7742720246315002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,5120,0.2587519884109497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,4096,0.21190400421619415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,3584,0.18771199882030487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,1024,0.06966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,3072,0.1648319959640503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,2048,0.11804799735546112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,2560,0.1414400041103363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,1536,0.09481599926948547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,512,0.042367998510599136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,128,0.022911999374628067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,256,0.028736000880599022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,12288,0.472351998090744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,10240,0.39552000164985657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,8192,0.32416000962257385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,16384,0.6248639822006226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,5120,0.20899200439453125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,4096,0.1719360053539276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,7168,0.28697600960731506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,65536,3.0299839973449707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,3584,0.1520639955997467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,2560,0.11449600011110306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,3072,0.1329279989004135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,2048,0.09558399766683578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,1024,0.05462399870157242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,1536,0.07683199644088745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,512,0.03478400036692619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,256,0.023744000121951103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,128,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,10240,0.3476479947566986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,12288,0.41200000047683716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,65536,4.8763837814331055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,8192,0.2848320007324219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,3584,0.1409280002117157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,65536,2.440768003463745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,5120,0.19126400351524353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,4096,0.15769599378108978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,16384,0.5395519733428955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,7168,0.25603199005126953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,3072,0.1255359947681427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,2048,0.0923520028591156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,1536,0.0806720033288002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,512,0.03248000144958496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,1024,0.05104000121355057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,2560,0.10976000130176544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,256,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,128,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,65536,3.6610240936279297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,8192,0.21004800498485565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,7168,0.18688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,5120,0.14217600226402283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,16384,0.3944000005722046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,12288,0.3033280074596405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,10240,0.2569279968738556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,3584,0.10719999670982361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,3072,0.095551997423172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,4096,0.11900799721479416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,2048,0.06719999760389328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,1024,0.038336001336574554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,1536,0.05071999877691269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,256,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,2560,0.07628799974918365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,512,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,128,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,8192,0.16556799411773682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,12288,0.24143999814987183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,7168,0.14691199362277985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,10240,0.20428800582885742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,3584,0.08105599880218506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,4096,0.09004800021648407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,5120,0.10902400314807892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,3072,0.0711359977722168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,16384,0.31644800305366516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,2048,0.05193600058555603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,2560,0.06259199976921082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,1536,0.04201599955558777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,1024,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,512,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,256,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,128,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,65536,1.5240960121154785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,65536,2.129055976867676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,8192,0.16601599752902985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,12288,0.24083200097084045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,10240,0.20371200144290924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,7168,0.14668799936771393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,16384,0.3152320086956024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,2560,0.061792001128196716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,5120,0.1090880036354065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,3072,0.07209599763154984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,4096,0.08982399851083755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,3584,0.08038400113582611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,2048,0.05190400034189224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,1024,0.03296000137925148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,512,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,256,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,1536,0.042367998510599136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,128,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,7168,0.12035199999809265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,8192,0.1340160071849823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,10240,0.2070399969816208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,12288,0.1881919950246811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,65536,1.2280000448226929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,4096,0.0791039988398552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,3072,0.06534399837255478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,3584,0.06406400352716446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,5120,0.09305600076913834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,16384,0.2428160011768341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,2560,0.05827200040221214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,2048,0.05075199902057648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,1024,0.03551999852061272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,1536,0.037376001477241516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,512,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,256,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,128,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,8192,0.11497599631547928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,12288,0.16073599457740784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,7168,0.1043199971318245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,10240,0.13846400380134583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,16384,0.20576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,4096,0.06707199662923813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,5120,0.08131200075149536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,3584,0.06111999973654747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,3072,0.057312000542879105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,2560,0.049215998500585556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,2048,0.04121600091457367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,1536,0.03356799855828285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,1024,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,512,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,65536,1.21452796459198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,256,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,128,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,65536,0.9006400108337402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,7168,0.08022399991750717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,8192,0.11088000237941742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,12288,0.12518399953842163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,5120,0.05907199904322624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,10240,0.10611200332641602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,16384,0.1629440039396286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,4096,0.04927999898791313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,3584,0.045632001012563705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,2560,0.035360001027584076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,3072,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,2048,0.030559999868273735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,256,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,1024,0.020640000700950623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,512,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,128,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,1536,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,12288,0.12425599992275238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,7168,0.07772800326347351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,10240,0.10534399747848511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,8192,0.08710400015115738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,3584,0.044415999203920364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,16384,0.1616320013999939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,3072,0.04047999903559685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,4096,0.049247998744249344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,5120,0.05881600081920624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,512,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,65536,0.7511360049247742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,2560,0.034591998904943466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,1024,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,2048,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,1536,0.0261439997702837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,256,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,128,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,12288,0.07075200229883194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,10240,0.07622399926185608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,7168,0.07558400183916092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,8192,0.050016000866889954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,16384,0.11423999816179276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,5120,0.03532800078392029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,3072,0.03638400137424469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,3584,0.02828799933195114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,4096,0.044224001467227936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,65536,0.6115840077400208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,2560,0.022784000262618065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,512,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,1024,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,2048,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,1536,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,256,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,128,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,12288,0.05539200082421303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,10240,0.04694399982690811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,8192,0.041760001331567764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,7168,0.04505600035190582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,16384,0.06380800157785416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,65536,0.5916479825973511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,3584,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,4096,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,3072,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,5120,0.02953599952161312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,256,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,1024,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,2048,0.021663999184966087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,512,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,2560,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,1536,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,128,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,65536,0.382176011800766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,10240,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,8192,0.033663999289274216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,12288,0.043327998369932175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,7168,0.0307839997112751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,16384,0.04451199993491173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,3584,0.030688000842928886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,3072,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,2560,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,4096,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,5120,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,2048,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,1024,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,1536,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,7168,0.02723200060427189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,65536,0.21571199595928192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,8192,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,12288,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,10240,0.030688000842928886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,16384,0.040192000567913055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,5120,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,4096,0.021023999899625778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,3584,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,3072,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,2560,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,1536,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,512,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,1024,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,2048,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,65536,0.11686400324106216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,5120,0.814624011516571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,4096,0.664031982421875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,3584,0.5785599946975708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,7168,1.107807993888855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,65536,0.09942399710416794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,8192,1.2598719596862793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,1536,0.27929601073265076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,3072,0.5088319778442383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,10240,1.557088017463684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,2560,0.4326399862766266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,1024,0.18723200261592865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,2048,0.3567039966583252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,256,0.0682239979505539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,512,0.10838399827480316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,128,0.054976001381874084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,12288,1.8602240085601807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,7168,0.28412801027297974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,8192,0.3216319978237152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,10240,0.3961920142173767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,16384,0.6214720010757446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,5120,0.20790399610996246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,3584,0.1520639955997467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,3072,0.13289600610733032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,4096,0.17174400389194489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,2560,0.11443199962377548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,12288,0.47788798809051514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,2048,0.0939520001411438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,1536,0.07363200187683105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,1024,0.05385600030422211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,512,0.03392000123858452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,128,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,256,0.02377600036561489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,16384,2.45686411857605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,12288,0.3558399975299835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,7168,0.21660800278186798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,10240,0.30054399371147156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,16384,0.4678399860858917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,5120,0.15939199924468994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,8192,0.24425600469112396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,3584,0.11740799993276596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,4096,0.13104000687599182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,2560,0.08790399879217148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,3072,0.10208000242710114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,2048,0.07203199714422226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,1536,0.05910399928689003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,256,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,512,0.02831999957561493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,1024,0.043327998369932175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,128,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,65536,2.4194560050964355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,12288,0.30239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,8192,0.2096640020608902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,10240,0.25699201226234436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,5120,0.13504000008106232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,16384,0.3924480080604553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,7168,0.18854400515556335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,4096,0.1178240031003952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,3072,0.0859839990735054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,3584,0.0989760011434555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,2048,0.06255999952554703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,2560,0.0745600014925003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,1536,0.05020799860358238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,1024,0.03763199970126152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,512,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,128,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,65536,1.8212800025939941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,256,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,5120,0.1090560033917427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,10240,0.20281599462032318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,12288,0.2431039959192276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,7168,0.1467839926481247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,8192,0.16582399606704712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,16384,0.3163839876651764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,3072,0.07043199986219406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,4096,0.08988799899816513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,3584,0.08012799918651581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,2048,0.05087999999523163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,2560,0.06054399907588959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,1536,0.04124800115823746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,1024,0.030559999868273735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,512,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,256,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,128,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,65536,1.4945600032806396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,12288,0.2393919974565506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,8192,0.16438399255275726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,7168,0.14665600657463074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,10240,0.20425599813461304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,5120,0.10838399827480316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,4096,0.08912000060081482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,3584,0.08051200211048126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,3072,0.0713919997215271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,2048,0.05097600072622299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,2560,0.061503998935222626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,16384,0.3158720135688782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,1024,0.03296000137925148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,1536,0.04435199871659279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,512,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,128,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,256,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,65536,1.2142399549484253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,7168,0.10275200009346008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,8192,0.14473600685596466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,12288,0.16035200655460358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,10240,0.13769599795341492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,5120,0.08108799904584885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,16384,0.2685759961605072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,4096,0.08204799890518188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,3072,0.05584000051021576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,3584,0.0735040009021759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,2560,0.05500800162553787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,2048,0.04854400083422661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,1536,0.033504001796245575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,512,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,1024,0.03510399907827377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,256,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,128,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,7168,0.07756800204515457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,8192,0.08796799927949905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,16384,0.16889600455760956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,12288,0.12432000041007996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,10240,0.10684800148010254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,5120,0.05974400043487549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,3072,0.039903998374938965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,3584,0.044863998889923096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,4096,0.04975999891757965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,2560,0.03510399907827377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,2048,0.030208000913262367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,1024,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,1536,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,512,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,256,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,65536,1.0112320184707642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,128,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,65536,1.2188160419464111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,12288,0.12441600114107132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,7168,0.07804799824953079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,8192,0.08710400015115738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,16384,0.1619199961423874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,10240,0.10620799660682678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,5120,0.0589120015501976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,4096,0.04940799996256828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,3072,0.0395519994199276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,3584,0.04479999840259552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,2560,0.03497600182890892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,65536,0.6110399961471558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,2048,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,1536,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,1024,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,256,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,512,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,128,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,5120,0.05958399921655655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,12288,0.12415999919176102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,8192,0.08726400136947632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,7168,0.08009599894285202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,10240,0.1064319983124733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,16384,0.16182400286197662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,4096,0.04960000142455101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,3584,0.04451199993491173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,3072,0.039872001856565475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,2560,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,2048,0.03049599938094616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,1536,0.025728000327944756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,1024,0.01945599913597107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,256,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,512,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,128,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,65536,0.6115520000457764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,7168,0.06934399902820587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,12288,0.10655999928712845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,5120,0.054336000233888626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,8192,0.07676800340414047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,10240,0.09244800359010696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,16384,0.13619199395179749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,3584,0.04214400053024292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,3072,0.039455998688936234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,4096,0.046751998364925385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,2048,0.030592000111937523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,2560,0.03551999852061272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,1536,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,512,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,1024,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,256,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,128,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,65536,0.586624026298523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,12288,0.0880960002541542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,7168,0.04467200115323067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,10240,0.0589120015501976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,8192,0.04979199916124344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,16384,0.08876799792051315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,5120,0.04931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,4096,0.029983999207615852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,3072,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,2560,0.022655999287962914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,2048,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,3584,0.02751999907195568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,1536,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,1024,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,512,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,128,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,256,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,12288,0.06659200042486191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,7168,0.044415999203920364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,10240,0.058079998940229416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,8192,0.049056001007556915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,65536,0.49139198660850525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,16384,0.0790719985961914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,5120,0.03433600068092346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,2048,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,4096,0.029759999364614487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,3584,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,2560,0.02284800074994564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,3072,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,1536,0.017343999817967415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,65536,0.31494399905204773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,256,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,1024,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,512,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,128,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,12288,0.055615998804569244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,5120,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,7168,0.03625600039958954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,8192,0.03999999910593033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,10240,0.04793599992990494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,16384,0.06451199948787689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,3584,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,4096,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,3072,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,2048,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,1536,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,2560,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,512,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,1024,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,256,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,65536,0.2441280037164688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,7168,0.03177599981427193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,8192,0.03417599946260452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,12288,0.0387520007789135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,10240,0.03939199820160866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,16384,0.04416000097990036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,5120,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,3584,0.031168000772595406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,4096,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,3072,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,2560,0.025119999423623085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,2048,0.021856000646948814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,1536,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,1024,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,65536,0.18131199479103088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,12288,0.0390079990029335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,7168,0.029120000079274178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,8192,0.02796800062060356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,16384,0.03683200106024742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,10240,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,5120,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,4096,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,3584,0.02038400061428547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,2560,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,1536,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,3072,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,2048,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,1024,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,256,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,512,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,65536,0.1181119978427887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,128,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,5120,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,12288,0.02831999957561493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,7168,0.02425600029528141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,10240,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,8192,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,16384,0.033504001796245575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,4096,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,3584,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,3072,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,2560,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,1536,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,2048,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,512,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,65536,0.09484799951314926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,1024,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,256,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,4096,0.4942399859428406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,7168,0.8369600176811218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,65536,0.06537599861621857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,5120,0.6119999885559082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,3072,0.38652798533439636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,3584,0.44489601254463196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,1024,0.36633598804473877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,2560,0.3314880132675171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,1536,0.21353599429130554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,512,0.0912960022687912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,8192,0.9451839923858643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,2048,0.270687997341156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,256,0.059007998555898666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,128,0.054687999188899994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,12288,0.35679998993873596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,10240,1.1792320013046265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,8192,0.2447039932012558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,16384,0.7756800055503845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,12288,1.3969919681549072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,7168,0.21590399742126465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,10240,0.3004159927368164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,4096,0.130048006772995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,5120,0.15964800119400024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,2560,0.08828800171613693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,3072,0.10236799716949463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,3584,0.11708799749612808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,1536,0.057760000228881836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,1024,0.04310400038957596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,512,0.029759999364614487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,2048,0.18095999956130981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,128,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,256,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,12288,0.27401599287986755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,7168,0.17103999853134155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,8192,0.19177600741386414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,16384,1.8490560054779053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,16384,0.3558720052242279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,3584,0.09596800059080124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,4096,0.10655999928712845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,3072,0.0862400010228157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,10240,0.23286400735378265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,2560,0.07526399940252304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,5120,0.12857599556446075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,65536,1.824512004852295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,2048,0.06204799935221672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,1536,0.05011200159788132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,256,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,1024,0.03766399994492531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,512,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,128,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,12288,0.24031999707221985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,5120,0.10950399935245514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,10240,0.2033279985189438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,8192,0.16582399606704712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,7168,0.1472959965467453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,16384,0.549344003200531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,3584,0.0806720033288002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,4096,0.08985599875450134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,3072,0.15411199629306793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,1536,0.041471999138593674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,2560,0.06143999844789505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,1024,0.031808000057935715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,256,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,2048,0.05158400163054466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,512,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,128,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,12288,0.1884479969739914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,7168,0.14774399995803833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,8192,0.13488000631332397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,5120,0.09273599833250046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,10240,0.1613440066576004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,16384,0.31542399525642395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,4096,0.07913599908351898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,3072,0.05580800026655197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,2560,0.04851200059056282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,3584,0.07103999704122543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,2048,0.050944000482559204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,1536,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,1024,0.026496000587940216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,128,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,512,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,256,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,12288,0.16896000504493713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,16384,0.21583999693393707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,8192,0.11919999867677689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,10240,0.14441600441932678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,7168,0.10812799632549286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,5120,0.11036799848079681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,4096,0.07023999840021133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,3072,0.07196799665689468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,3584,0.06499200314283371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,65536,1.346336007118225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,2560,0.06272000074386597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,2048,0.04188799858093262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,1536,0.03328000009059906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,1024,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,256,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,512,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,128,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,65536,0.9064000248908997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,12288,0.1244800016283989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,65536,1.2137600183486938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,10240,0.1416960060596466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,8192,0.11574400216341019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,65536,1.4048960208892822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,16384,0.1627199947834015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,5120,0.059039998799562454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,7168,0.10195200145244598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,3584,0.0575999990105629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,3072,0.05100800096988678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,2560,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,4096,0.04931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,2048,0.04912000149488449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,128,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,512,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,1024,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,1536,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,256,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,5120,0.05881600081920624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,7168,0.07958400249481201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,8192,0.0894400030374527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,16384,0.35756799578666687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,10240,0.10627199709415436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,12288,0.12464000284671783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,4096,0.05113599821925163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,3584,0.04412800073623657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,3072,0.040383998304605484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,2560,0.03551999852061272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,2048,0.03049599938094616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,1536,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,65536,0.8361600041389465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,1024,0.020160000771284103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,512,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,256,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,128,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,5120,0.05846399813890457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,12288,0.12451200187206268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,8192,0.08668799698352814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,7168,0.07734400033950806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,10240,0.10582400113344193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,16384,0.16131199896335602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,4096,0.04902400076389313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,3072,0.03916800022125244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,3584,0.04473600164055824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,2560,0.03497600182890892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,2048,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,1536,0.024671999737620354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,512,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,1024,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,256,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,128,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,65536,0.6679999828338623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,12288,0.12326399981975555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,8192,0.0867839977145195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,7168,0.07913599908351898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,10240,0.1048320010304451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,5120,0.05907199904322624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,16384,0.12444800138473511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,4096,0.04931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,3584,0.04438399896025658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,3072,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,1536,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,2048,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,2560,0.034623999148607254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,1024,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,512,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,256,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,128,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,12288,0.08528000116348267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,65536,0.6466559767723083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,7168,0.053888000547885895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,10240,0.07203199714422226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,8192,0.059967998415231705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,5120,0.041728001087903976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,16384,0.10982400178909302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,4096,0.03545600175857544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,3072,0.03510399907827377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,2560,0.03577600046992302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,3584,0.03161599859595299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,2048,0.03145600110292435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,1536,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,512,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,1024,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,256,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,65536,0.44464001059532166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,128,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,12288,0.08787199854850769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,5120,0.04809600114822388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,7168,0.06444799900054932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,10240,0.05859199911355972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,8192,0.04931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,16384,0.08736000210046768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,4096,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,3584,0.03667199984192848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,65536,0.4094400107860565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,2560,0.022336000576615334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,3072,0.03315199911594391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,2048,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,1536,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,1024,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,512,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,256,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,128,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,8192,0.04879999905824661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,12288,0.060416001826524734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,5120,0.03407999873161316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,10240,0.05459199845790863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,7168,0.043807998299598694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,16384,0.0692799985408783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,2048,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,3072,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,1536,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,4096,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,2560,0.022143999114632607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,3584,0.026623999699950218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,512,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,1024,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,128,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,256,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,65536,0.24422399699687958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,12288,0.0453759990632534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,8192,0.038943998515605927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,5120,0.027648000046610832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,7168,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,16384,0.0644799992442131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,10240,0.04131200164556503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,3072,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,4096,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,2560,0.025087999179959297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,1536,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,3584,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,2048,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,65536,0.1937599927186966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,1024,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,128,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,512,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,256,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,12288,0.03814399987459183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,8192,0.031808000057935715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,7168,0.032607998698949814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,10240,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,5120,0.02457600086927414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,16384,0.0432640016078949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,4096,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,2560,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,3584,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,1536,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,3072,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,2048,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,65536,0.1682240068912506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,12288,0.03167999908328056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,8192,0.027871999889612198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,7168,0.02611199952661991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,5120,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,16384,0.03891199827194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,10240,0.03001599945127964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,4096,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,3584,0.01945599913597107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,3072,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,2048,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,2560,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,1536,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,65536,0.09699200093746185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,12288,0.02739199995994568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,7168,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,8192,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,5120,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,10240,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,16384,0.031007999554276466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,4096,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,3072,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,3584,0.019392000511288643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,65536,0.07814399898052216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,2560,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,2048,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,1536,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,128,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,256,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,5120,0.408735990524292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,4096,0.33315199613571167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,12288,0.9376000165939331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,8192,0.6340479850769043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,7168,0.5560640096664429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,65536,0.05929600074887276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,2048,0.17580799758434296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,3072,0.25171199440956116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,1536,0.13654400408267975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,16384,1.232319951057434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,512,0.0589120015501976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,256,0.038784001022577286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,128,0.03235200047492981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,2560,0.21478399634361267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,1024,0.0966079980134964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,3584,0.2967680096626282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,12288,0.24003200232982635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,8192,0.16630400717258453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,10240,0.20281599462032318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,7168,0.14617599546909332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,5120,0.10838399827480316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,10240,0.791808009147644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,3584,0.08079999685287476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,4096,0.09036800265312195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,2560,0.06067200005054474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,2048,0.051263999193906784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,16384,0.31494399905204773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,3072,0.07065600156784058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,1536,0.04025600105524063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,1024,0.03049599938094616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,512,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,128,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,256,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,8192,0.12880000472068787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,5120,0.08457600325345993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,7168,0.11350400000810623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,12288,0.18697600066661835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,10240,0.1607999950647354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,4096,0.06969600170850754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,16384,0.24323199689388275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,3072,0.055615998804569244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,65536,1.2171839475631714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,3584,0.06265600025653839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,2048,0.04044799879193306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,1536,0.03267199918627739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,2560,0.04835199937224388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,1024,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,256,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,128,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,512,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,12288,0.16089600324630737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,8192,0.14444799721240997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,10240,0.17238399386405945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,7168,0.10310400277376175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,16384,0.2685439884662628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,5120,0.09737599641084671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,4096,0.08108799904584885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,65536,0.8967360258102417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,3072,0.055424001067876816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,2048,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,3584,0.07417599856853485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,2560,0.051072001457214355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,1024,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,512,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,128,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,256,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,1536,0.03302399814128876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,10240,0.10777600109577179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,12288,0.12444800138473511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,8192,0.08963199704885483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,7168,0.07772800326347351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,5120,0.0594559982419014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,16384,0.16313600540161133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,2560,0.03532800078392029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,3072,0.040383998304605484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,4096,0.04944000020623207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,3584,0.044544000178575516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,2048,0.0297279991209507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,256,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,512,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,1024,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,1536,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,128,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,7168,0.07945600152015686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,8192,0.08835200220346451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,10240,0.10969600081443787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,12288,0.12371200323104858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,16384,0.16220800578594208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,3584,0.044256001710891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,4096,0.05008000135421753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,65536,0.6114559769630432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,3072,0.03984000161290169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,5120,0.05907199904322624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,2560,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,2048,0.029503999277949333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,512,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,1536,0.024607999250292778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,65536,0.7537919878959656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,128,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,1024,0.020735999569296837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,256,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,5120,0.05990400165319443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,12288,0.12438400089740753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,8192,0.08835200220346451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,7168,0.08207999914884567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,10240,0.10553599894046783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,16384,0.16185599565505981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,3072,0.03961599990725517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,4096,0.05052800104022026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,2048,0.029759999364614487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,3584,0.04467200115323067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,2560,0.03542400151491165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,1536,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,1024,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,256,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,512,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,128,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,12288,0.08793599903583527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,8192,0.049375999718904495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,10240,0.08777599781751633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,7168,0.06307200342416763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,16384,0.08860799670219421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,65536,0.6133440136909485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,5120,0.03465599939227104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,3584,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,4096,0.04070400074124336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,3072,0.02518399991095066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,2560,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,65536,0.4983679950237274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,2048,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,1536,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,1024,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,128,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,512,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,256,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,12288,0.06889600306749344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,5120,0.03468799963593483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,10240,0.058687999844551086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,8192,0.04934399947524071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,7168,0.04428799822926521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,16384,0.08748800307512283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,4096,0.02985600009560585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,3072,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,2048,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,65536,0.315744012594223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,2560,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,3584,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,512,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,1024,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,1536,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,256,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,128,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,12288,0.08777599781751633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,10240,0.05852799862623215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,5120,0.03446400165557861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,7168,0.06384000182151794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,8192,0.04879999905824661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,16384,0.11315199732780457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,4096,0.040800001472234726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,3584,0.035999998450279236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,2560,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,3072,0.032416000962257385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,1536,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,2048,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,1024,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,512,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,256,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,128,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,65536,0.2813119888305664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,12288,0.06191999837756157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,16384,0.07292799651622772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,10240,0.0551999993622303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,7168,0.04416000097990036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,8192,0.04899200052022934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,4096,0.029600000008940697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,5120,0.0342399999499321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,65536,0.24473600089550018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,3072,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,3584,0.026784000918269157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,2560,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,2048,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,1024,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,1536,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,512,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,256,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,128,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,12288,0.05315199866890907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,8192,0.042208001017570496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,7168,0.04473600164055824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,10240,0.04902400076389313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,5120,0.02796800062060356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,16384,0.06758400052785873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,3584,0.031168000772595406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,4096,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,3072,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,65536,0.21216000616550446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,2560,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,2048,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,1536,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,256,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,512,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,1024,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,12288,0.04553600028157234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,5120,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,10240,0.04102399945259094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,16384,0.054368000477552414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,7168,0.035232000052928925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,8192,0.037087999284267426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,3584,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,4096,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,2560,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,2048,0.0163199994713068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,3072,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,1536,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,65536,0.21644799411296844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,256,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,512,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,1024,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,12288,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,16384,0.0530879981815815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,5120,0.026496000587940216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,8192,0.03392000123858452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,10240,0.039712000638246536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,7168,0.03155200183391571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,4096,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,3584,0.020160000771284103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,2560,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,1536,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,2048,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,3072,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,65536,0.14508800208568573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,1024,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,512,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,256,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,12288,0.038656000047922134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,8192,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,5120,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,16384,0.03731200098991394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,10240,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,7168,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,4096,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,65536,0.12092799693346024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,3584,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,3072,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,2560,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,512,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,1024,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,128,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,12288,0.027936000376939774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,8192,0.028672000393271446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,7168,0.02768000029027462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,10240,0.026655999943614006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,5120,0.023104000836610794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,16384,0.03215999901294708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,4096,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,3584,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,65536,0.08073599636554718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,3072,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,2560,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,1024,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,128,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,8192,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,10240,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,5120,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,7168,0.02271999977529049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,12288,0.02707199938595295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,16384,0.029152000322937965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,4096,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,3584,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,2560,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,3072,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,2048,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,1536,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,65536,0.061535999178886414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,1024,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,512,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,4096,0.3341439962387085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,8192,0.6822720170021057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,10240,0.8005759716033936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,3584,0.34438401460647583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,65536,0.054016001522541046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,16384,1.4264320135116577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,3072,0.5638399720191956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,2560,0.4480319917201996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,5120,0.43270400166511536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,1536,0.2828800082206726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,2048,0.37987199425697327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,1024,0.1841920018196106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,128,0.03174399957060814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,512,0.09673599898815155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,256,0.046911999583244324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,7168,0.6501759886741638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,5120,0.10976000130176544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,8192,0.16601599752902985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,7168,0.14739200472831726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,10240,0.22111999988555908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,16384,0.3340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,12288,0.2561280131340027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,12288,1.0030399560928345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,2048,0.09324800223112106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,3584,0.08064000308513641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,3072,0.07094399631023407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,4096,0.09033600240945816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,2560,0.06163199990987778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,1536,0.07135999947786331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,512,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,256,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,1024,0.04793599992990494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,128,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,5120,0.16380800306797028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,7168,0.21609599888324738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,8192,0.24713599681854248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,16384,0.2582719922065735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,12288,0.3800959885120392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,10240,0.31865599751472473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,2560,0.0854720026254654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,3072,0.10208000242710114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,3584,0.12281599640846252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,4096,0.1356479972600937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,2048,0.06940799951553345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,1024,0.037567999213933945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,1536,0.05283199995756149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,512,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,256,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,128,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,12288,0.16678400337696075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,8192,0.11791999638080597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,7168,0.12080000340938568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,10240,0.14070400595664978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,16384,0.22111999988555908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,5120,0.0764480009675026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,3072,0.050144001841545105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,4096,0.08131200075149536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,2560,0.05459199845790863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,3584,0.06659200042486191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,2048,0.06480000168085098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,1536,0.04057599976658821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,1024,0.034591998904943466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,512,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,256,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,128,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,12288,0.2499839961528778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,10240,0.10672000050544739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,16384,0.31811198592185974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,8192,0.08787199854850769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,7168,0.07875200361013412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,65536,0.8359360098838806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,65536,1.3693759441375732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,5120,0.05920000001788139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,3584,0.044415999203920364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,4096,0.0498879998922348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,2048,0.04809600114822388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,3072,0.03951999917626381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,65536,0.8967360258102417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,2560,0.056671999394893646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,1024,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,1536,0.03779200091958046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,512,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,128,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,256,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,12288,0.13312000036239624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,8192,0.0915519967675209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,10240,0.11164800077676773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,5120,0.05974400043487549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,7168,0.1496960073709488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,65536,0.6351040005683899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,16384,0.17769600450992584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,4096,0.049855999648571014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,3584,0.08390399813652039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,2560,0.03468799963593483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,2048,0.042527999728918076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,1536,0.03417599946260452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,3072,0.03936000168323517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,1024,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,512,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,256,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,128,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,12288,0.12428800016641617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,8192,0.12950399518013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,10240,0.10540799796581268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,5120,0.08582399785518646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,16384,0.16262400150299072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,7168,0.053408000618219376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,4096,0.035232000052928925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,3584,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,2048,0.03798399865627289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,3072,0.055456001311540604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,2560,0.04771199822425842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,1536,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,512,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,1024,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,256,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,128,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,12288,0.08614400029182434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,8192,0.07065600156784058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,16384,0.16387200355529785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,7168,0.06390400230884552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,10240,0.10793600231409073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,5120,0.057792000472545624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,65536,0.40992000699043274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,65536,0.6136959791183472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,4096,0.04054399952292442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,3584,0.042047999799251556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,2560,0.027904000133275986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,3072,0.03161599859595299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,2048,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,1024,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,1536,0.02252800017595291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,512,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,256,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,128,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,8192,0.08441600203514099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,12288,0.11785600334405899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,5120,0.05632000043988228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,16384,0.09359999746084213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,10240,0.10252799838781357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,7168,0.07654400169849396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,65536,0.34537601470947266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,4096,0.04694399982690811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,3584,0.04076800122857094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,1536,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,2560,0.029952000826597214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,3072,0.03561599925160408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,2048,0.026784000918269157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,1024,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,512,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,256,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,128,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,12288,0.07289600372314453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,7168,0.054816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,10240,0.08739200234413147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,8192,0.06143999844789505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,16384,0.11270400136709213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,5120,0.05011200159788132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,4096,0.040991999208927155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,2560,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,3584,0.03596799820661545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,1536,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,2048,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,3072,0.031808000057935715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,1024,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,256,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,128,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,65536,0.29206401109695435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,12288,0.0605119988322258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,10240,0.058368001133203506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,8192,0.06700800359249115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,7168,0.06438399851322174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,16384,0.0753600001335144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,5120,0.04623999819159508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,4096,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,3584,0.03152000159025192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,3072,0.028063999488949776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,2560,0.027103999629616737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,2048,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,1024,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,1536,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,65536,0.24508799612522125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,512,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,128,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,256,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,12288,0.04726399853825569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,8192,0.03807999938726425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,5120,0.03519999980926514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,16384,0.05756799876689911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,10240,0.04179200157523155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,7168,0.04451199993491173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,65536,0.23024000227451324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,4096,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,3584,0.030400000512599945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,2560,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,2048,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,3072,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,1536,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,1024,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,512,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,256,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,128,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,12288,0.04259200021624565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,16384,0.04864000156521797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,7168,0.037856001406908035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,5120,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,10240,0.03891199827194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,8192,0.03411199897527695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,4096,0.0261439997702837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,2560,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,3584,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,3072,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,65536,0.20112000405788422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,1536,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,2048,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,1024,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,512,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,12288,0.03747199848294258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,5120,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,7168,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,8192,0.03385600075125694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,16384,0.04451199993491173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,10240,0.036607999354600906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,4096,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,2560,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,3584,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,3072,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,2048,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,1536,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,65536,0.144896000623703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,1024,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,12288,0.031168000772595406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,5120,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,7168,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,16384,0.03907199949026108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,8192,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,10240,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,4096,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,3584,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,3072,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,2560,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,2048,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,65536,0.12256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,12288,0.03097599931061268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,10240,0.02860799990594387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,7168,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,5120,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,8192,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,16384,0.03152000159025192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,4096,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,65536,0.07321599870920181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,3584,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,3072,0.01744000054895878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,2048,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,1024,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,12288,0.02844800055027008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,5120,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,8192,0.023455999791622162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,10240,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,7168,0.023104000836610794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,16384,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,65536,0.06774400174617767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,4096,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,3584,0.01945599913597107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,2048,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,512,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,128,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,4096,0.1708800047636032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,5120,0.20950399339199066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,65536,0.047968000173568726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,7168,0.285535991191864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,12288,0.47519999742507935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,8192,0.32233598828315735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,10240,0.398144006729126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,2560,0.11283200234174728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,3584,0.15110400319099426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,1536,0.07264000177383423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,1024,0.05398400127887726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,512,0.034591998904943466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,16384,0.625823974609375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,128,0.020096000283956528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,3072,0.13091200590133667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,2048,0.09203200042247772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,256,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,12288,0.12831999361515045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,8192,0.08876799792051315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,5120,0.058687999844551086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,10240,0.10956799983978271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,7168,0.07958400249481201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,16384,0.16841599345207214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,4096,0.04912000149488449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,3584,0.04396799951791763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,3072,0.04009599983692169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,1536,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,2048,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,2560,0.03436800092458725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,256,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,1024,0.020320000126957893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,512,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,128,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,12288,0.12422399967908859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,5120,0.059487998485565186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,7168,0.07731200009584427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,8192,0.08755200356245041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,10240,0.10604800283908844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,4096,0.04867200180888176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,3584,0.04428799822926521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,16384,0.16019199788570404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,3072,0.0390079990029335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,2048,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,2560,0.03404799848794937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,1024,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,65536,0.6149119734764099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,1536,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,128,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,256,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,512,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,12288,0.12467200309038162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,5120,0.059808000922203064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,10240,0.10569600015878677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,7168,0.06947200000286102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,16384,0.16204799711704254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,8192,0.07692799717187881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,4096,0.0490880012512207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,3584,0.04387199878692627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,2560,0.03420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,3072,0.03907199949026108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,1536,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,2048,0.02969600073993206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,1024,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,256,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,128,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,512,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,12288,0.07046400010585785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,16384,0.11475200206041336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,7168,0.04524800181388855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,8192,0.0721919983625412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,10240,0.06028800085186958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,5120,0.04870399832725525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,4096,0.04012800008058548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,65536,0.6105599999427795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,65536,0.58815997838974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,3072,0.03174399957060814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,2560,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,3584,0.026559999212622643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,2048,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,1536,0.020479999482631683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,512,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,256,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,1024,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,128,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,12288,0.06956800073385239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,5120,0.048128001391887665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,8192,0.0713919997215271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,10240,0.07478400319814682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,7168,0.04521600157022476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,65536,0.3149760067462921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,4096,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,16384,0.08771199733018875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,2560,0.02831999957561493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,3584,0.036031998693943024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,3072,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,1536,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,2048,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,1024,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,512,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,256,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,128,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,12288,0.08700799942016602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,8192,0.062111999839544296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,10240,0.07331199944019318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,7168,0.04399999976158142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,5120,0.034304000437259674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,16384,0.10931199789047241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,4096,0.0395519994199276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,3072,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,2560,0.027744000777602196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,1536,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,65536,0.2795200049877167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,3584,0.03587200120091438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,2048,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,1024,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,512,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,128,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,256,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,12288,0.05379199981689453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,5120,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,7168,0.04483199864625931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,8192,0.0416640006005764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,16384,0.0652799978852272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,10240,0.047839999198913574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,65536,0.2110079973936081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,4096,0.023615999147295952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,3584,0.03062400035560131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,3072,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,2048,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,2560,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,1536,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,1024,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,512,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,256,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,12288,0.050655998289585114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,10240,0.044256001710891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,7168,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,8192,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,16384,0.05961599946022034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,5120,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,65536,0.21644799411296844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,4096,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,3584,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,1536,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,3072,0.020096000283956528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,2048,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,2560,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,256,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,1024,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,512,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,12288,0.07094399631023407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,16384,0.060416001826524734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,5120,0.034752000123262405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,8192,0.049695998430252075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,10240,0.05984000116586685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,7168,0.04543999955058098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,4096,0.033376000821590424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,65536,0.16227200627326965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,3584,0.029952000826597214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,2560,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,3072,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,1536,0.01833599992096424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,2048,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,1024,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,512,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,256,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,128,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,12288,0.04358400031924248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,8192,0.035711999982595444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,5120,0.027936000376939774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,7168,0.03286400064826012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,10240,0.03852799907326698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,65536,0.1695999950170517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,16384,0.04972799867391586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,4096,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,2560,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,3072,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,3584,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,2048,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,1536,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,1024,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,512,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,256,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,12288,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,7168,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,5120,0.027168000116944313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,8192,0.033824000507593155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,16384,0.05289600044488907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,10240,0.03939199820160866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,4096,0.03308799862861633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,65536,0.13235199451446533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,3584,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,3072,0.02707199938595295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,2048,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,2560,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,1536,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,1024,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,512,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,128,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,12288,0.038336001336574554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,10240,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,5120,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,8192,0.031328000128269196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,7168,0.02876799926161766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,16384,0.04444799944758415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,65536,0.11788800358772278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,4096,0.020800000056624413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,3584,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,3072,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,2048,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,1536,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,1024,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,128,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,12288,0.038336001336574554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,16384,0.045951999723911285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,8192,0.0315839983522892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,7168,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,10240,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,5120,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,4096,0.023104000836610794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,65536,0.09737599641084671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,2560,0.015807999297976494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,3072,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,3584,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,2048,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,1536,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,1024,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,256,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,12288,0.03587200120091438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,5120,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,7168,0.02672000043094158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,8192,0.028704000636935234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,16384,0.03299200162291527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,10240,0.026176000013947487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,65536,0.09542399644851685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,4096,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,3584,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,3072,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,2560,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,2048,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,512,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,12288,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,5120,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,16384,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,7168,0.022655999287962914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,8192,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,10240,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,65536,0.06339199841022491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,4096,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,3584,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,2560,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,3072,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,1024,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,512,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,12288,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,10240,0.024607999250292778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,5120,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,65536,0.05452800169587135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,7168,0.02239999920129776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,8192,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,16384,0.026496000587940216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,4096,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,3072,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,3584,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,1536,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,2560,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,2048,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,128,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,5120,0.21011200547218323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,8192,0.3214080035686493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,12288,0.4731520116329193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,65536,0.04681599885225296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,10240,0.39958399534225464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,4096,0.17107200622558594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,16384,0.6316800117492676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,2560,0.20108799636363983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,3584,0.1528639942407608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,2048,0.16752000153064728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,1536,0.12732799351215363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,512,0.04303999990224838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,1024,0.08502399921417236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,256,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,128,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,7168,0.2832320034503937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,3072,0.24012799561023712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,12288,0.23216000199317932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,4096,0.08150400221347809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,7168,0.12950399518013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,8192,0.15267199277877808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,10240,0.18918399512767792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,5120,0.09734400361776352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,16384,0.30931198596954346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,3584,0.04460800066590309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,2560,0.05222399905323982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,1536,0.03420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,1024,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,512,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,2048,0.04492799937725067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,3072,0.05984000116586685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,256,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,128,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,12288,0.12595200538635254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,8192,0.08748800307512283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,5120,0.058720000088214874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,16384,0.16387200355529785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,7168,0.07766400277614594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,10240,0.10582400113344193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,4096,0.04879999905824661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,3584,0.04383999854326248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,3072,0.039744000881910324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,2048,0.02985600009560585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,65536,0.7568640112876892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,1536,0.025631999596953392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,2560,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,1024,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,512,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,256,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,128,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,12288,0.11660800129175186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,5120,0.08025600016117096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,7168,0.07231999933719635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,16384,0.16262400150299072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,10240,0.09340800344944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,8192,0.08214399963617325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,65536,0.6150400042533875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,4096,0.06284800171852112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,3584,0.05644800141453743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,3072,0.052000001072883606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,2560,0.04399999976158142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,2048,0.03888000175356865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,1536,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,1024,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,512,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,256,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,128,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,12288,0.08902399986982346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,5120,0.058240000158548355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,7168,0.07068800181150436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,8192,0.08748800307512283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,10240,0.07548800110816956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,16384,0.11392000317573547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,3072,0.03811199963092804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,3584,0.04585599899291992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,65536,0.6111680269241333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,4096,0.04297599941492081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,2560,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,2048,0.02876799926161766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,1536,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,1024,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,512,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,256,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,128,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,12288,0.08867199718952179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,7168,0.08217599987983704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,8192,0.08435200154781342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,5120,0.055743999779224396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,16384,0.11481600254774094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,10240,0.07673600316047668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,65536,0.3146879971027374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,4096,0.05071999877691269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,3584,0.04473600164055824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,1536,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,3072,0.03868800029158592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,2048,0.027103999629616737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,2560,0.031072000041604042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,1024,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,128,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,512,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,256,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,12288,0.08726400136947632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,16384,0.11308799684047699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,5120,0.03638400137424469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,8192,0.061184000223875046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,7168,0.0544000007212162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,10240,0.0759039968252182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,4096,0.043935999274253845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,65536,0.27961599826812744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,3072,0.03574400022625923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,3584,0.026655999943614006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,2048,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,2560,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,1536,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,512,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,1024,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,128,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,256,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,12288,0.06905599683523178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,5120,0.03433600068092346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,16384,0.0671359971165657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,8192,0.041280001401901245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,7168,0.04390399903059006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,10240,0.049056001007556915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,65536,0.20998400449752808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,4096,0.027744000777602196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,3584,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,3072,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,2048,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,1536,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,2560,0.024671999737620354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,1024,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,512,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,256,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,12288,0.053599998354911804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,10240,0.045791998505592346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,5120,0.0318400003015995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,8192,0.04556800052523613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,7168,0.04275200143456459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,65536,0.21641600131988525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,16384,0.06099199876189232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,4096,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,3584,0.024927999824285507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,2560,0.019392000511288643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,2048,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,3072,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,1536,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,1024,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,256,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,512,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,128,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,12288,0.06963200122117996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,8192,0.04931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,16384,0.06038400158286095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,5120,0.031328000128269196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,7168,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,10240,0.04572800174355507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,3072,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,3584,0.03046399913728237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,4096,0.03363199904561043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,2560,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,2048,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,65536,0.16102400422096252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,1536,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,1024,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,512,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,256,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,128,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,12288,0.047231998294591904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,8192,0.036031998693943024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,5120,0.028031999245285988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,10240,0.04089599847793579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,7168,0.03328000009059906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,65536,0.15430399775505066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,16384,0.05129599943757057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,4096,0.023615999147295952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,2560,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,3584,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,1024,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,1536,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,2048,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,3072,0.020735999569296837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,256,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,512,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,12288,0.03798399865627289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,10240,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,16384,0.04524800181388855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,7168,0.029632000252604485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,8192,0.03392000123858452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,5120,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,4096,0.03340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,65536,0.14531199634075165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,3584,0.030079999938607216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,3072,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,2048,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,2560,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,1536,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,512,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,1024,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,256,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,12288,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,7168,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,5120,0.024191999807953835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,8192,0.027904000133275986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,10240,0.03110400028526783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,16384,0.04499199986457825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,65536,0.12121599912643433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,4096,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,3584,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,3072,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,1024,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,1536,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,2560,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,2048,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,256,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,12288,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,16384,0.03891199827194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,10240,0.0297279991209507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,7168,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,8192,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,5120,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,65536,0.09696000069379807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,4096,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,3584,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,2560,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,3072,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,1024,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,1536,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,512,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,128,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,12288,0.030880000442266464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,65536,0.07740800082683563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,10240,0.025696000084280968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,8192,0.023744000121951103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,16384,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,7168,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,5120,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,4096,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,3584,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,1536,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,2048,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,3072,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,2560,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,12288,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,10240,0.02457600086927414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,8192,0.021695999428629875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,7168,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,5120,0.02070399932563305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,16384,0.028704000636935234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,65536,0.06649599969387054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,4096,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,3072,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,2048,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,12288,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,8192,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,7168,0.023871999233961105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,5120,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,10240,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,65536,0.04790399968624115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,16384,0.02703999914228916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,3072,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,4096,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,2560,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,3584,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,1536,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,1024,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,2048,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,65536,0.047488000243902206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,5120,0.22281600534915924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,8192,0.34678399562835693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,12288,0.4939520061016083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,10240,0.4338879883289337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,3584,0.31174400448799133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,4096,0.18537600338459015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,3072,0.2651520073413849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,2048,0.21055999398231506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,16384,0.6451839804649353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,2560,0.24617600440979004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,1536,0.1555519998073578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,256,0.029920000582933426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,128,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,1024,0.10579200088977814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,7168,0.2969279885292053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,512,0.05491200089454651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,12288,0.1541759967803955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,5120,0.06032000109553337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,7168,0.15724800527095795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,4096,0.05302400141954422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,8192,0.1799360066652298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,10240,0.22041599452495575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,16384,0.3615359961986542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,3584,0.08377599716186523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,3072,0.06748799979686737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,1536,0.04041599854826927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,1024,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,2048,0.04960000142455101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,512,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,256,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,2560,0.03667199984192848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,128,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,12288,0.13843199610710144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,7168,0.0769599974155426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,8192,0.08687999844551086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,10240,0.12639999389648438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,5120,0.05929600074887276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,3584,0.044863998889923096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,3072,0.04041599854826927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,4096,0.0496320016682148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,2560,0.03625600039958954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,16384,0.18198400735855103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,2048,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,65536,0.6301440000534058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,128,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,512,0.01692800037562847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,1536,0.026016000658273697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,1024,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,256,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,12288,0.12540799379348755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,5120,0.08627200126647949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,7168,0.133215993642807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,8192,0.13363200426101685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,16384,0.16448000073432922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,10240,0.10499200224876404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,65536,0.6532480120658875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,4096,0.0544000007212162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,3072,0.05423999950289726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,2560,0.046112000942230225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,3584,0.0514880008995533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,1536,0.03161599859595299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,2048,0.04153599962592125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,1024,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,512,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,256,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,128,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,12288,0.13184000551700592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,10240,0.07561600208282471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,8192,0.08505599945783615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,5120,0.06883200258016586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,16384,0.11299200356006622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,7168,0.09542399644851685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,65536,0.6104639768600464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,4096,0.05251200124621391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,3072,0.04572800174355507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,3584,0.0560000017285347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,2560,0.039455998688936234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,1536,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,2048,0.029791999608278275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,1024,0.020735999569296837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,512,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,256,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,128,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,12288,0.08723200112581253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,10240,0.07558400183916092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,7168,0.08643200248479843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,5120,0.0666240006685257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,8192,0.08828800171613693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,16384,0.11350400000810623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,4096,0.05955199897289276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,65536,0.31439998745918274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,3584,0.05008000135421753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,3072,0.0427200011909008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,2560,0.04163200035691261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,2048,0.0289280004799366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,1536,0.02707199938595295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,256,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,1024,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,128,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,512,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,12288,0.08604799956083298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,5120,0.05804799869656563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,7168,0.06371200084686279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,16384,0.11401599645614624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,8192,0.06137600168585777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,10240,0.07443200051784515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,65536,0.2810240089893341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,4096,0.04899200052022934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,3072,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,3584,0.04556800052523613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,1536,0.023072000592947006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,2560,0.03340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,512,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,2048,0.028063999488949776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,1024,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,256,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,128,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,12288,0.07811199873685837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,8192,0.04390399903059006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,10240,0.04793599992990494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,16384,0.06972800195217133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,65536,0.21113599836826324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,7168,0.05161599814891815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,5120,0.03484800085425377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,4096,0.03376000002026558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,3584,0.028736000880599022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,2048,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,3072,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,1536,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,2560,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,1024,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,512,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,256,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,128,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,12288,0.05215999856591225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,16384,0.06464000046253204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,8192,0.05657599866390228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,7168,0.050624001771211624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,5120,0.03747199848294258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,10240,0.04966399818658829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,65536,0.20873600244522095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,4096,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,3584,0.02828799933195114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,2048,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,2560,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,1024,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,1536,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,3072,0.02579200081527233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,256,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,128,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,12288,0.0695360004901886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,8192,0.05020799860358238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,16384,0.06047999858856201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,5120,0.03468799963593483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,7168,0.043935999274253845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,65536,0.16105599701404572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,10240,0.05939200147986412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,4096,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,3584,0.03126399964094162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,3072,0.028063999488949776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,2560,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,2048,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,1024,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,1536,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,512,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,256,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,128,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,12288,0.04495999962091446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,7168,0.03481600061058998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,5120,0.0315839983522892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,10240,0.042367998510599136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,8192,0.03888000175356865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,65536,0.16828800737857819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,16384,0.05555199831724167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,4096,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,3584,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,3072,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,2560,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,1536,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,1024,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,2048,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,512,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,256,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,128,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,12288,0.03766399994492531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,7168,0.029023999348282814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,8192,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,65536,0.1472959965467453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,10240,0.03596799820661545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,5120,0.026079999282956123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,16384,0.04531199857592583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,4096,0.03411199897527695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,3584,0.031039999797940254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,3072,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,2560,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,1536,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,2048,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,1024,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,256,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,512,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,12288,0.03468799963593483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,16384,0.0453759990632534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,10240,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,8192,0.028575999662280083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,5120,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,65536,0.12284799665212631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,7168,0.02691200003027916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,4096,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,3584,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,3072,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,2560,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,2048,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,1024,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,1536,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,256,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,512,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,128,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,12288,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,10240,0.026976000517606735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,16384,0.03622400015592575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,8192,0.027744000777602196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,5120,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,7168,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,65536,0.09987200051546097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,4096,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,3072,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,2560,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,3584,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,2048,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,1536,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,1024,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,256,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,12288,0.027168000116944313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,10240,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,65536,0.08102399855852127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,8192,0.025407999753952026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,16384,0.029759999364614487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,5120,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,7168,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,4096,0.022272000089287758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,3584,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,3072,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,1536,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,2048,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,1024,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,2560,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,512,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,256,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,12288,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,10240,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,5120,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,8192,0.023615999147295952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,16384,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,65536,0.05603199824690819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,7168,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,4096,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,3584,0.020128000527620316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,3072,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,2560,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,2048,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,512,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,1536,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,1024,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,128,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,256,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,12288,0.026784000918269157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,8192,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,16384,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,65536,0.04822399839758873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,10240,0.026208000257611275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,5120,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,7168,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,4096,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,2560,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,3072,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,512,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,2048,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,3584,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,1536,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,65536,0.05209600180387497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,5120,0.13631999492645264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,12288,0.3097600042819977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,7168,0.18547199666500092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,8192,0.20960000157356262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,4096,0.11273600161075592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,3584,0.09942399710416794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,3072,0.08723200112581253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,2560,0.07503999769687653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,2048,0.06329599767923355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,1536,0.05145600065588951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,1024,0.038975998759269714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,512,0.025919999927282333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,10240,0.2622399926185608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,256,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,128,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,12288,0.08959999680519104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,10240,0.07654400169849396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,5120,0.04761600121855736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,8192,0.0713919997215271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,7168,0.062431998550891876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,16384,0.11398400366306305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,16384,0.40620800852775574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,4096,0.039455998688936234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,3584,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,3072,0.02812799997627735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,2560,0.02816000021994114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,2048,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,1024,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,1536,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,512,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,256,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,128,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,12288,0.0843840017914772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,7168,0.052960000932216644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,8192,0.05920000001788139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,5120,0.040192000567913055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,10240,0.07344000041484833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,4096,0.03388800099492073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,16384,0.11129599809646606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,3584,0.031136000528931618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,65536,0.4231039881706238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,3072,0.028031999245285988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,2560,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,1536,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,2048,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,1024,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,512,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,128,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,256,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,12288,0.08816000074148178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,8192,0.06195199862122536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,10240,0.07443200051784515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,16384,0.11315199732780457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,5120,0.047359999269247055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,7168,0.052671998739242554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,65536,0.4014720022678375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,3584,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,4096,0.03936000168323517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,3072,0.032287999987602234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,2560,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,1536,0.020096000283956528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,2048,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,1024,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,512,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,128,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,256,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,12288,0.05516799911856651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,5120,0.039583999663591385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,8192,0.04044799879193306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,7168,0.04403200000524521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,10240,0.04879999905824661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,16384,0.0671359971165657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,4096,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,65536,0.42022401094436646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,3072,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,2560,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,3584,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,2048,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,1536,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,256,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,1024,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,128,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,12288,0.05084799975156784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,16384,0.06345599889755249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,7168,0.0435199998319149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,10240,0.06083200126886368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,65536,0.21619200706481934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,8192,0.049215998500585556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,5120,0.03936000168323517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,4096,0.033695999532938004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,3584,0.030271999537944794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,2560,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,3072,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,2048,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,1536,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,1024,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,512,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,256,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,128,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,12288,0.07129599899053574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,5120,0.03388800099492073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,8192,0.05081599950790405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,16384,0.0904960036277771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,10240,0.06028800085186958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,7168,0.04447999969124794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,65536,0.1932159960269928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,4096,0.028831999748945236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,3584,0.03017600066959858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,2560,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,3072,0.027615999802947044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,2048,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,1536,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,1024,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,256,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,512,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,12288,0.04399999976158142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,10240,0.04047999903559685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,7168,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,8192,0.03465599939227104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,5120,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,16384,0.053247999399900436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,4096,0.03311999887228012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,3072,0.027135999873280525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,65536,0.13996799290180206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,2560,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,3584,0.029952000826597214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,2048,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,1536,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,1024,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,512,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,256,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,12288,0.04070400074124336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,10240,0.03654399886727333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,5120,0.02409599907696247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,7168,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,8192,0.03340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,65536,0.12176000326871872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,16384,0.04419200122356415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,3584,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,2048,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,4096,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,3072,0.01772800087928772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,1536,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,1024,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,512,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,12288,0.03840000182390213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,10240,0.03516799956560135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,7168,0.0390079990029335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,16384,0.045184001326560974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,8192,0.04297599941492081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,5120,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,65536,0.10681600123643875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,4096,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,3072,0.027135999873280525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,2560,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,2048,0.020800000056624413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,1536,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,3584,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,1024,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,12288,0.035840000957250595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,10240,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,8192,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,5120,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,7168,0.02768000029027462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,16384,0.04070400074124336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,4096,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,65536,0.09731200337409973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,3584,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,2048,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,3072,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,2560,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,1536,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,128,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,512,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,12288,0.038015998899936676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,16384,0.04652800038456917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,10240,0.03500799834728241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,5120,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,8192,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,4096,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,7168,0.028736000880599022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,65536,0.08723200112581253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,3584,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,3072,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,1536,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,512,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,2560,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,2048,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,256,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,12288,0.03206399828195572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,65536,0.08044800162315369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,7168,0.025280000641942024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,5120,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,8192,0.027648000046610832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,10240,0.030047999694943428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,16384,0.03641600161790848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,4096,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,3584,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,2560,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,1536,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,2048,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,12288,0.03587200120091438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,10240,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,5120,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,16384,0.03219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,8192,0.02831999957561493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,7168,0.025631999596953392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,65536,0.0708480030298233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,4096,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,2560,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,3584,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,1024,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,1536,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,2048,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,65536,0.08083199709653854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,12288,0.026176000013947487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,8192,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,5120,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,7168,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,10240,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,16384,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,4096,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,3584,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,3072,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,2560,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,2048,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,512,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,12288,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,8192,0.02304000034928322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,10240,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,7168,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,5120,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,65536,0.053247999399900436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,16384,0.02707199938595295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,4096,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,3584,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,3072,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,2048,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,1536,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,512,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,12288,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,65536,0.049247998744249344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,10240,0.02457600086927414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,8192,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,5120,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,16384,0.02672000043094158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,7168,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,4096,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,2560,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,3584,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,3072,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,1536,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,512,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,65536,0.04217600077390671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,4096,0.1117120012640953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,5120,0.1366720050573349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,7168,0.18585599958896637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,12288,0.30294400453567505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,10240,0.2563199996948242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,2560,0.0764480009675026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,16384,0.4026559889316559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,3072,0.08832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,1024,0.04057599976658821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,1536,0.05273599922657013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,8192,0.2110079973936081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,2048,0.06511999666690826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,3584,0.10089600086212158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,512,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,256,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,128,0.015807999297976494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,12288,0.0870399996638298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,5120,0.05596800148487091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,4096,0.04678399860858917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,8192,0.08297599852085114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,10240,0.07372800260782242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,7168,0.05270399898290634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,16384,0.11324799805879593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,3072,0.035679999738931656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,2560,0.03315199911594391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,3584,0.04287999868392944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,1024,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,2048,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,1536,0.024159999564290047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,512,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,256,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,128,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,12288,0.08336000144481659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,7168,0.05244800075888634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,5120,0.04121600091457367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,10240,0.07078400254249573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,8192,0.059776000678539276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,16384,0.107744000852108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,3584,0.03206399828195572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,4096,0.03478400036692619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,1536,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,3072,0.029023999348282814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,2560,0.02566399984061718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,2048,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,65536,0.42080000042915344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,1024,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,256,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,512,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,128,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,12288,0.0865280032157898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,5120,0.04975999891757965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,8192,0.060095999389886856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,10240,0.07411199808120728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,7168,0.05443200096487999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,16384,0.10793600231409073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,65536,0.39638400077819824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,4096,0.04198399931192398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,3072,0.03379200026392937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,3584,0.03830400109291077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,2560,0.02953599952161312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,2048,0.02319999970495701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,1536,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,512,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,1024,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,256,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,128,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,12288,0.0544000007212162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,8192,0.043136000633239746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,10240,0.048608001321554184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,5120,0.040800001472234726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,7168,0.03788800165057182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,16384,0.06444799900054932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,65536,0.4214720129966736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,4096,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,3072,0.023104000836610794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,3584,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,2048,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,2560,0.025312000885605812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,1536,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,1024,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,512,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,256,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,128,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,12288,0.051711998879909515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,16384,0.060095999389886856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,5120,0.040863998234272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,7168,0.04451199993491173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,8192,0.050144001841545105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,10240,0.04588799923658371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,4096,0.03484800085425377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,3584,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,2560,0.025119999423623085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,3072,0.02828799933195114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,2048,0.02300800010561943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,1536,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,1024,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,65536,0.20582400262355804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,256,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,512,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,128,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,12288,0.06915199756622314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,65536,0.18486399948596954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,7168,0.04368000105023384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,8192,0.050016000866889954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,5120,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,10240,0.058720000088214874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,16384,0.09062399715185165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,3584,0.022624000906944275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,4096,0.029632000252604485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,2560,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,1536,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,3072,0.02848000079393387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,2048,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,1024,0.01651199907064438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,512,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,128,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,256,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,12288,0.043327998369932175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,8192,0.03577600046992302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,10240,0.03939199820160866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,16384,0.05411199852824211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,7168,0.03190400078892708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,5120,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,65536,0.14553600549697876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,4096,0.03363199904561043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,3584,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,2560,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,2048,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,3072,0.02812799997627735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,1536,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,1024,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,256,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,128,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,12288,0.03964800015091896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,5120,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,8192,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,65536,0.12246400117874146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,10240,0.03836800158023834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,7168,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,16384,0.045504000037908554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,4096,0.022016000002622604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,2560,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,3584,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,3072,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,1536,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,2048,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,1024,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,256,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,128,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,512,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,12288,0.037408001720905304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,10240,0.03420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,16384,0.045823998749256134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,8192,0.04275200143456459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,65536,0.10780800133943558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,5120,0.03049599938094616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,7168,0.03929600119590759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,4096,0.033440001308918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,3072,0.027712000533938408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,2048,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,3584,0.03142400085926056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,1024,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,2560,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,1536,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,256,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,512,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,12288,0.035360001027584076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,65536,0.10188800096511841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,16384,0.042208001017570496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,10240,0.032735999673604965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,5120,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,8192,0.03187200054526329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,7168,0.026784000918269157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,4096,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,3584,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,2048,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,3072,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,1536,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,512,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,2560,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,1024,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,128,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,256,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,12288,0.033055998384952545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,8192,0.02860799990594387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,16384,0.03920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,65536,0.08879999816417694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,10240,0.02969600073993206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,5120,0.022272000089287758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,7168,0.025728000327944756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,4096,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,3072,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,3584,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,2560,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,1024,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,1536,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,2048,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,256,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,128,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,12288,0.03136000037193298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,65536,0.09551999717950821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,10240,0.029120000079274178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,7168,0.024032000452280045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,8192,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,16384,0.03587200120091438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,5120,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,4096,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,3584,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,2560,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,3072,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,1536,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,2048,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,1024,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,512,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,256,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,12288,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,5120,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,8192,0.02627200074493885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,10240,0.027264000847935677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,16384,0.031039999797940254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,7168,0.02319999970495701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,65536,0.0719359964132309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,4096,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,3584,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,3072,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,2048,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,1536,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,2560,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,1024,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,256,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,128,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,512,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,12288,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,65536,0.06323199719190598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,10240,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,16384,0.030047999694943428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,8192,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,5120,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,7168,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,4096,0.021023999899625778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,3584,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,2560,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,2048,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,3072,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,1024,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,1536,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,512,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,256,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,12288,0.026016000658273697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,65536,0.04870399832725525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,8192,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,10240,0.025248000398278236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,16384,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,5120,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,7168,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,4096,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,3584,0.01945599913597107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,3072,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,1536,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,1024,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,2560,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,2048,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,512,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,256,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,12288,0.025280000641942024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,65536,0.047200001776218414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,7168,0.024191999807953835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,8192,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,10240,0.025119999423623085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,16384,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,5120,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,4096,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,3584,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,3072,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,2560,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,2048,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,1536,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,512,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,65536,0.04294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,5120,0.13900800049304962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,7168,0.1971520036458969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,12288,0.32230401039123535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,10240,0.26607999205589294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,3584,0.10416000336408615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,4096,0.11343999952077866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,3072,0.08908800035715103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,2560,0.07577600330114365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,2048,0.06444799900054932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,1024,0.040352001786231995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,1536,0.052480001002550125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,8192,0.22115199267864227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,256,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,512,0.03376000002026558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,128,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,12288,0.08723200112581253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,5120,0.04108799993991852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,8192,0.09276799857616425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,7168,0.08579199761152267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,10240,0.0740479975938797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,16384,0.1136000007390976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,16384,0.427839994430542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,4096,0.05478399991989136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,3584,0.04841599985957146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,3072,0.039903998374938965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,2048,0.030592000111937523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,2560,0.026079999282956123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,1536,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,1024,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,512,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,128,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,256,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,7168,0.052799999713897705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,10240,0.07062400132417679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,8192,0.05955199897289276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,12288,0.08428800106048584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,16384,0.1122559979557991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,5120,0.041152000427246094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,4096,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,3584,0.032255999743938446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,65536,0.45903998613357544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,3072,0.028543999418616295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,2560,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,2048,0.02304000034928322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,1536,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,1024,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,512,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,128,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,256,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,12288,0.08617600053548813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,10240,0.07427199929952621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,7168,0.05430399999022484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,5120,0.0551999993622303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,8192,0.06185600161552429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,16384,0.11363200098276138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,4096,0.04495999962091446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,65536,0.4307839870452881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,3584,0.03964800015091896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,3072,0.03590400144457817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,2048,0.026784000918269157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,2560,0.03222399950027466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,1024,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,1536,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,512,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,256,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,128,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,12288,0.05491200089454651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,5120,0.04294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,7168,0.047839999198913574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,8192,0.04320000112056732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,10240,0.053727999329566956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,16384,0.06735999882221222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,65536,0.4208639860153198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,4096,0.036288000643253326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,3072,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,3584,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,2048,0.02239999920129776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,2560,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,1536,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,512,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,1024,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,256,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,128,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,12288,0.0522879995405674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,10240,0.05862399935722351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,8192,0.050592001527547836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,7168,0.04416000097990036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,5120,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,16384,0.0607680007815361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,65536,0.21644799411296844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,4096,0.03488000109791756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,3584,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,3072,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,2048,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,2560,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,1536,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,1024,0.015807999297976494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,256,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,512,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,128,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,12288,0.07052800059318542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,10240,0.05833600088953972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,8192,0.050912000238895416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,16384,0.09027200192213058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,5120,0.03478400036692619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,7168,0.04383999854326248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,65536,0.19324800372123718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,4096,0.03452799841761589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,3072,0.02828799933195114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,3584,0.03110400028526783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,65536,0.13920000195503235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,2560,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,2048,0.022143999114632607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,1536,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,256,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,1024,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,128,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,512,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,12288,0.04383999854326248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,8192,0.03577600046992302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,5120,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,4096,0.03420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,16384,0.055424001067876816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,7168,0.03145600110292435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,10240,0.03964800015091896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,3584,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,3072,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,2560,0.025119999423623085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,1536,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,512,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,2048,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,1024,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,128,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,256,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,12288,0.04016000032424927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,16384,0.04630399867892265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,8192,0.036896001547575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,7168,0.030559999868273735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,65536,0.11785600334405899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,10240,0.03667199984192848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,5120,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,4096,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,3584,0.020160000771284103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,2560,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,2048,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,3072,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,1024,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,1536,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,512,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,256,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,128,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,12288,0.0379519984126091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,7168,0.0395519994199276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,5120,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,65536,0.10784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,10240,0.03417599946260452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,8192,0.042847998440265656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,16384,0.04540799930691719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,4096,0.026496000587940216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,3584,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,3072,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,2560,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,2048,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,1536,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,1024,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,512,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,256,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,128,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,12288,0.0350399985909462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,10240,0.033215999603271484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,16384,0.04217600077390671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,65536,0.10153599828481674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,5120,0.025280000641942024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,7168,0.027712000533938408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,8192,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,4096,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,2560,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,2048,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,3584,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,3072,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,1536,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,1024,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,512,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,128,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,256,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,12288,0.03276799991726875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,65536,0.08892799913883209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,10240,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,5120,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,7168,0.02566399984061718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,8192,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,16384,0.039264000952243805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,4096,0.022016000002622604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,3584,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,2560,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,3072,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,2048,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,1536,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,1024,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,256,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,12288,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,8192,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,16384,0.035360001027584076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,7168,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,5120,0.02300800010561943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,10240,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,65536,0.09763199836015701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,4096,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,3072,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,3584,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,2560,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,2048,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,1536,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,1024,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,512,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,256,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,128,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,12288,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,65536,0.07222399860620499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,16384,0.031168000772595406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,7168,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,5120,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,8192,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,10240,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,4096,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,2560,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,3072,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,3584,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,2048,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,1536,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,1024,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,512,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,256,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,12288,0.02566399984061718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,65536,0.0682239979505539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,10240,0.02304000034928322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,16384,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,7168,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,8192,0.023744000121951103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,5120,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,4096,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,3584,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,2048,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,2560,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,3072,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,1024,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,1536,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,512,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,256,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,128,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,12288,0.027871999889612198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,65536,0.04854400083422661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,8192,0.026976000517606735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,7168,0.02643200010061264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,10240,0.027135999873280525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,5120,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,16384,0.029791999608278275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,4096,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,3584,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,2560,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,3072,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,1536,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,2048,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,1024,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,512,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,128,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,12288,0.027871999889612198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,10240,0.027712000533938408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,16384,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,7168,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,8192,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,65536,0.04320000112056732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,5120,0.0226879995316267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,4096,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,3584,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,2560,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,1536,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,2048,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,512,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,128,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,65536,0.04639999940991402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,5120,0.11017599701881409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,4096,0.08947200328111649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,10240,0.20739200711250305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,7168,0.1488640010356903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,16384,0.3242560029029846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,3072,0.07046400010585785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,8192,0.16809600591659546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,3584,0.08060800284147263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,2048,0.05129599943757057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,1536,0.04092799872159958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,2560,0.06095999851822853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,12288,0.24592000246047974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,512,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,1024,0.03177599981427193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,256,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,128,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,12288,0.0721919983625412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,10240,0.05958399921655655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,4096,0.03392000123858452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,5120,0.03993599861860275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,7168,0.044256001710891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,8192,0.04944000020623207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,16384,0.09043200314044952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,3584,0.026559999212622643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,3072,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,1536,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,2560,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,2048,0.021344000473618507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,1024,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,512,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,256,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,128,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,10240,0.05987200140953064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,12288,0.06931199878454208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,8192,0.04831999912858009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,5120,0.03363199904561043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,7168,0.044064000248909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,4096,0.0289280004799366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,16384,0.09004800021648407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,3584,0.026623999699950218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,3072,0.023871999233961105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,2560,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,65536,0.3269439935684204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,2048,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,1536,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,512,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,1024,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,128,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,256,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,12288,0.07014399766921997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,10240,0.06102399900555611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,7168,0.04399999976158142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,5120,0.033376000821590424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,65536,0.3267199993133545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,8192,0.049056001007556915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,16384,0.09052799642086029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,4096,0.033824000507593155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,3584,0.030400000512599945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,3072,0.027871999889612198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,2048,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,1536,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,512,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,1024,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,2560,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,256,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,128,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,12288,0.044895999133586884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,5120,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,7168,0.03177599981427193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,8192,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,10240,0.03936000168323517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,65536,0.32678401470184326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,16384,0.053247999399900436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,4096,0.033376000821590424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,3584,0.029791999608278275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,3072,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,2560,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,1536,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,2048,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,1024,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,256,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,128,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,12288,0.0414079986512661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,8192,0.032575998455286026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,65536,0.16976000368595123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,5120,0.02953599952161312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,10240,0.03753599897027016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,7168,0.031328000128269196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,16384,0.04960000142455101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,4096,0.03254399821162224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,3072,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,2560,0.024032000452280045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,1536,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,3584,0.02953599952161312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,1024,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,2048,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,256,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,128,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,12288,0.06163199990987778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,10240,0.05251200124621391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,7168,0.037408001720905304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,8192,0.041919998824596405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,5120,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,16384,0.04275200143456459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,4096,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,65536,0.1504960060119629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,3584,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,3072,0.026496000587940216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,2560,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,2048,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,1536,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,1024,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,512,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,256,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,12288,0.03827200084924698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,5120,0.02396799996495247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,7168,0.028543999418616295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,10240,0.036031998693943024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,8192,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,16384,0.04604800045490265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,65536,0.11241599917411804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,4096,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,3584,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,3072,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,2560,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,1536,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,1024,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,2048,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,512,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,12288,0.032735999673604965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,10240,0.030559999868273735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,5120,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,7168,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,8192,0.02723200060427189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,16384,0.03753599897027016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,4096,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,2560,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,65536,0.0966079980134964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,3584,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,1536,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,2048,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,1024,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,12288,0.0342399999499321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,10240,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,7168,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,8192,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,16384,0.03936000168323517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,5120,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,4096,0.023744000121951103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,65536,0.08601599931716919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,3584,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,3072,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,2048,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,256,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,12288,0.031168000772595406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,5120,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,16384,0.03411199897527695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,10240,0.02876799926161766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,7168,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,8192,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,65536,0.07727999985218048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,4096,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,3072,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,2560,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,1536,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,2048,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,256,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,12288,0.035840000957250595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,10240,0.03174399957060814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,16384,0.03417599946260452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,5120,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,8192,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,7168,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,65536,0.06992000341415405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,4096,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,3072,0.017343999817967415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,1536,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,2560,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,2048,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,128,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,12288,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,65536,0.08038400113582611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,8192,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,10240,0.026623999699950218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,5120,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,7168,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,16384,0.030880000442266464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,4096,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,3584,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,1024,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,1536,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,128,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,12288,0.02611199952661991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,10240,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,8192,0.022304000332951546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,65536,0.05532800033688545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,5120,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,16384,0.02985600009560585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,7168,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,4096,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,3584,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,3072,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,2560,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,2048,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,12288,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,16384,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,65536,0.053119998425245285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,10240,0.02409599907696247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,7168,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,5120,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,8192,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,4096,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,3584,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,2560,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,128,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,12288,0.03097599931061268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,65536,0.04825599864125252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,10240,0.03062400035560131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,8192,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,7168,0.025119999423623085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,16384,0.03145600110292435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,5120,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,4096,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,3584,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,2560,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,1536,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,128,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,12288,0.031039999797940254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,10240,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,8192,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,65536,0.04224000126123428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,16384,0.030880000442266464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,5120,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,7168,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,4096,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,3584,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,3072,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,2560,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,256,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,65536,0.04179200157523155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,7168,0.15007999539375305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,5120,0.10927999764680862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,4096,0.09011200070381165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,12288,0.24687999486923218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,8192,0.1666879951953888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,10240,0.2078399956226349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,3584,0.07996799796819687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,3072,0.07027199864387512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,2048,0.051072001457214355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,512,0.022624000906944275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,1536,0.042047999799251556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,1024,0.031808000057935715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,16384,0.3267520070075989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,2560,0.060736000537872314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,256,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,128,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,12288,0.07088000327348709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,5120,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,4096,0.03359999880194664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,7168,0.04553600028157234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,8192,0.049984000623226166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,10240,0.05894400179386139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,16384,0.08979199826717377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,3584,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,3072,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,2560,0.02163200080394745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,2048,0.021344000473618507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,1536,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,512,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,1024,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,256,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,12288,0.07011199742555618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,5120,0.033984001725912094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,10240,0.05859199911355972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,8192,0.0488319993019104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,16384,0.08924800157546997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,7168,0.04438399896025658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,4096,0.029055999591946602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,3584,0.02627200074493885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,3072,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,1536,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,2048,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,2560,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,65536,0.3277440071105957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,512,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,256,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,1024,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,12288,0.07039999961853027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,7168,0.04396799951791763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,5120,0.03331200033426285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,8192,0.048576001077890396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,65536,0.32838401198387146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,10240,0.06028800085186958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,16384,0.09011200070381165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,4096,0.03347200155258179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,3584,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,2560,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,3072,0.027168000116944313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,2048,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,1536,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,1024,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,512,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,256,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,128,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,12288,0.04483199864625931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,7168,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,8192,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,10240,0.038943998515605927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,5120,0.03049599938094616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,16384,0.052928000688552856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,65536,0.3272320032119751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,4096,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,3584,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,2048,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,3072,0.026847999542951584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,1536,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,2560,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,1024,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,512,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,128,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,12288,0.04169600084424019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,8192,0.03280000016093254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,10240,0.037408001720905304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,7168,0.03200000151991844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,16384,0.050335999578237534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,5120,0.029184000566601753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,65536,0.16860799491405487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,4096,0.03267199918627739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,3584,0.029983999207615852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,3072,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,2048,0.020735999569296837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,2560,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,1536,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,1024,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,512,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,12288,0.06128000095486641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,16384,0.04092799872159958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,5120,0.029152000322937965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,8192,0.04169600084424019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,7168,0.038176000118255615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,10240,0.03187200054526329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,65536,0.15043200552463531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,4096,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,3584,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,3072,0.017503999173641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,2560,0.02377600036561489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,1536,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,2048,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,1024,0.014879999682307243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,256,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,12288,0.03302399814128876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,8192,0.03017600066959858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,65536,0.11238399893045425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,7168,0.030079999938607216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,5120,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,10240,0.03497600182890892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,16384,0.046879999339580536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,4096,0.022304000332951546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,3072,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,3584,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,2560,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,2048,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,1536,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,1024,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,512,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,12288,0.03244800120592117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,8192,0.02751999907195568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,10240,0.03046399913728237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,16384,0.037376001477241516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,7168,0.026208000257611275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,5120,0.02332800067961216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,65536,0.09462399780750275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,4096,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,3584,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,2560,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,1536,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,128,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,12288,0.03455999866127968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,10240,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,65536,0.08745600283145905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,16384,0.03936000168323517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,8192,0.026367999613285065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,5120,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,7168,0.026016000658273697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,4096,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,3584,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,3072,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,1024,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,2048,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,12288,0.031168000772595406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,5120,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,10240,0.02768000029027462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,7168,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,8192,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,16384,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,65536,0.07718399912118912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,4096,0.020128000527620316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,3584,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,2560,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,2048,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,12288,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,65536,0.07001599669456482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,10240,0.02879999950528145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,8192,0.025312000885605812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,7168,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,16384,0.03308799862861633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,5120,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,4096,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,3584,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,3072,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,2560,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,2048,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,1024,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,512,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,12288,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,65536,0.0804160013794899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,7168,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,8192,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,10240,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,5120,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,16384,0.03152000159025192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,4096,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,3584,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,2560,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,512,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,1024,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,12288,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,7168,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,5120,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,8192,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,10240,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,65536,0.05510399863123894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,16384,0.029055999591946602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,4096,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,2048,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,12288,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,65536,0.05379199981689453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,10240,0.024927999824285507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,7168,0.024607999250292778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,16384,0.027103999629616737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,5120,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,8192,0.023871999233961105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,4096,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,3584,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,3072,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,2560,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,2048,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,1024,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,12288,0.034591998904943466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,65536,0.046560000628232956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,8192,0.028063999488949776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,10240,0.03142400085926056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,16384,0.03548799827694893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,7168,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,5120,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,4096,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,3584,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,3072,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,1536,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,2048,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,2560,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,512,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,12288,0.03411199897527695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,65536,0.04412800073623657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,8192,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,10240,0.03110400028526783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,7168,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,5120,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,16384,0.03590400144457817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,4096,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,3584,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,2560,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,3072,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,65536,0.043455999344587326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,512,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,4096,0.07702399790287018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,5120,0.09359999746084213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,12288,0.21116800606250763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,3584,0.06851200014352798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,8192,0.14374400675296783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,7168,0.12780800461769104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,16384,0.2770879864692688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,10240,0.1765120029449463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,1536,0.03612799942493439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,512,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,2048,0.044096000492572784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,1024,0.02751999907195568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,2560,0.05222399905323982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,3072,0.0605119988322258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,256,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,128,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,12288,0.06272000074386597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,5120,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,4096,0.03296000137925148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,10240,0.052671998739242554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,7168,0.05190400034189224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,8192,0.05644800141453743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,16384,0.08022399991750717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,3584,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,3072,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,2560,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,2048,0.02070399932563305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,512,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,1536,0.01772800087928772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,1024,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,256,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,12288,0.05942400172352791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,5120,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,7168,0.03939199820160866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,8192,0.041919998824596405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,16384,0.0756480023264885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,10240,0.050175998359918594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,4096,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,3584,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,3072,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,2048,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,65536,0.2873600125312805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,2560,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,1536,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,1024,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,512,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,128,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,256,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,12288,0.05942400172352791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,8192,0.04185599833726883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,5120,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,10240,0.0522879995405674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,7168,0.038336001336574554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,16384,0.07574400305747986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,65536,0.2741439938545227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,4096,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,3584,0.02953599952161312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,3072,0.026559999212622643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,2560,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,2048,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,1536,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,1024,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,512,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,256,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,128,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,12288,0.04073600098490715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,10240,0.037567999213933945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,7168,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,5120,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,16384,0.04870399832725525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,8192,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,65536,0.28678399324417114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,4096,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,3584,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,1024,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,2048,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,2560,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,1536,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,512,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,12288,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,5120,0.023231999948620796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,8192,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,10240,0.035551998764276505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,16384,0.04527999833226204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,7168,0.028416000306606293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,65536,0.14931200444698334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,4096,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,3584,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,3072,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,2048,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,2560,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,1024,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,1536,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,12288,0.032416000962257385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,8192,0.03814399987459183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,65536,0.12649600207805634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,10240,0.02796800062060356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,7168,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,5120,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,16384,0.036607999354600906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,4096,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,3584,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,2560,0.016287999227643013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,1024,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,1536,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,12288,0.036320000886917114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,10240,0.03347200155258179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,8192,0.029120000079274178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,65536,0.09948799759149551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,7168,0.02739199995994568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,5120,0.023104000836610794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,16384,0.03315199911594391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,4096,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,3584,0.020160000771284103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,2560,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,2048,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,12288,0.02860799990594387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,65536,0.08240000158548355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,16384,0.03222399950027466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,5120,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,10240,0.026016000658273697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,8192,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,7168,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,4096,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,3584,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,2560,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,512,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,1024,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,3072,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,1536,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,12288,0.03209599852561951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,65536,0.07500799745321274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,7168,0.024831999093294144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,8192,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,10240,0.026847999542951584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,16384,0.036479998379945755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,5120,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,4096,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,3584,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,3072,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,2560,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,1536,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,2048,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,512,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,12288,0.028255999088287354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,65536,0.06835199892520905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,8192,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,10240,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,5120,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,16384,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,7168,0.02284800074994564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,4096,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,12288,0.026367999613285065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,10240,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,7168,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,8192,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,65536,0.05999999865889549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,5120,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,4096,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,3072,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,3584,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,16384,0.030559999868273735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,2560,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,12288,0.02566399984061718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,65536,0.07321599870920181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,10240,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,16384,0.028736000880599022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,7168,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,8192,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,5120,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,4096,0.01945599913597107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,3584,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,3072,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,2048,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,2560,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,1024,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,512,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,12288,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,65536,0.047680001705884933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,7168,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,8192,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,10240,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,16384,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,5120,0.021663999184966087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,4096,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,3584,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,2560,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,2048,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,512,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,12288,0.03152000159025192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,8192,0.02768000029027462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,10240,0.028511999174952507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,5120,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,65536,0.04841599985957146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,7168,0.02691200003027916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,16384,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,4096,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,3584,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,2560,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,3072,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,12288,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,65536,0.04150399938225746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,8192,0.026623999699950218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,7168,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,10240,0.029152000322937965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,5120,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,16384,0.04291199892759323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,4096,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,3584,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,2560,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,3072,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,1536,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,512,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,12288,0.034015998244285583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,65536,0.05027199909090996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,8192,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,10240,0.028511999174952507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,16384,0.0427200011909008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,5120,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,7168,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,4096,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,3584,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,3072,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,2048,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,2560,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,1536,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,1024,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,256,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,65536,0.05097600072622299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,5120,0.08483199775218964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,4096,0.07027199864387512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,7168,0.11564800143241882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,8192,0.12950399518013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,12288,0.18995200097560883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,10240,0.15916800498962402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,3584,0.06268800050020218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,3072,0.05488000065088272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,2048,0.040511999279260635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,1024,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,1536,0.03324799984693527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,2560,0.04790399968624115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,512,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,256,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,128,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,16384,0.24937599897384644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,12288,0.05564799904823303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,8192,0.05510399863123894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,5120,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,7168,0.05040000006556511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,4096,0.03215999901294708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,10240,0.04742399975657463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,16384,0.0689919963479042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,3584,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,3072,0.026559999212622643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,2560,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,1536,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,2048,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,1024,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,512,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,256,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,12288,0.05395200103521347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,5120,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,10240,0.04598399996757507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,7168,0.03548799827694893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,8192,0.03836800158023834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,16384,0.06870400160551071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,4096,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,3584,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,3072,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,2560,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,1536,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,2048,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,1024,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,65536,0.24614399671554565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,512,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,256,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,128,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,12288,0.0549440011382103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,8192,0.03840000182390213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,5120,0.027135999873280525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,7168,0.03574400022625923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,10240,0.04566400125622749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,16384,0.06889600306749344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,65536,0.24697600305080414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,4096,0.02396799996495247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,3584,0.02163200080394745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,3072,0.026784000918269157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,2560,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,2048,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,1024,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,1536,0.01744000054895878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,512,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,256,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,12288,0.03929600119590759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,10240,0.03654399886727333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,5120,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,7168,0.03014400042593479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,8192,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,16384,0.048895999789237976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,65536,0.2735680043697357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,4096,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,3072,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,2560,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,3584,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,1536,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,2048,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,1024,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,12288,0.03673600032925606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,10240,0.033663999289274216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,8192,0.0289280004799366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,5120,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,7168,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,65536,0.13014400005340576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,16384,0.04374400153756142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,4096,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,3584,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,3072,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,2560,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,2048,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,1536,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,1024,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,12288,0.03094400092959404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,7168,0.024064000695943832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,10240,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,5120,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,8192,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,16384,0.03619199991226196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,4096,0.01942400075495243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,65536,0.11872000247240067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,3584,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,2560,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,3072,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,2048,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,1024,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,256,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,12288,0.026944000273942947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,65536,0.08947200328111649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,10240,0.025407999753952026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,7168,0.022592000663280487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,8192,0.022784000262618065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,5120,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,16384,0.03232000023126602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,4096,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,3584,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,3072,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,2048,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,1024,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,12288,0.02643200010061264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,10240,0.024927999824285507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,5120,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,7168,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,16384,0.029600000008940697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,65536,0.0764480009675026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,8192,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,4096,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,2560,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,3584,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,1024,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,12288,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,10240,0.02425600029528141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,5120,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,65536,0.06780800223350525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,8192,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,7168,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,16384,0.028704000636935234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,4096,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,3584,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,3072,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,2560,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,1536,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,2048,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,128,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,12288,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,8192,0.022592000663280487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,16384,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,65536,0.062144000083208084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,5120,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,7168,0.022272000089287758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,10240,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,4096,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,3072,0.01692800037562847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,3584,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,2560,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,2048,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,512,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,12288,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,65536,0.05452800169587135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,10240,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,7168,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,16384,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,5120,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,8192,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,4096,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,2560,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,1536,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,512,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,12288,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,65536,0.0496320016682148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,8192,0.02425600029528141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,10240,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,7168,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,5120,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,16384,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,4096,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,3584,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,1536,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,12288,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,8192,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,65536,0.04371200129389763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,7168,0.026367999613285065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,10240,0.031328000128269196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,5120,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,16384,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,4096,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,3584,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,3072,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,2560,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,1536,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,1024,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,512,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,12288,0.035711999982595444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,16384,0.04249599948525429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,65536,0.041919998824596405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,8192,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,7168,0.024800000712275505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,10240,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,5120,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,4096,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,3584,0.01833599992096424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,3072,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,2560,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,1536,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,256,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,12288,0.03433600068092346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,65536,0.050655998289585114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,10240,0.029152000322937965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,16384,0.04249599948525429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,8192,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,7168,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,5120,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,4096,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,3584,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,3072,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,1536,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,2048,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,2560,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,256,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,128,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,65536,0.07782399654388428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,12288,0.03283200040459633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,8192,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,16384,0.03984000161290169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,7168,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,10240,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,5120,0.020800000056624413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,4096,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,3584,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,3072,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,2560,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,2048,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,1536,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,512,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,128,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,65536,0.07657600194215775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,5120,0.08511999994516373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,4096,0.07027199864387512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,7168,0.11529599875211716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,12288,0.18937599658966064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,3584,0.06239999830722809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,8192,0.12995199859142303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,10240,0.1586879938840866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,2048,0.04064000025391579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,3072,0.05503999814391136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,2560,0.04790399968624115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,16384,0.24835200607776642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,1536,0.03276799991726875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,1024,0.025248000398278236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,256,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,512,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,128,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,12288,0.05385600030422211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,4096,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,8192,0.03846399858593941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,7168,0.049215998500585556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,5120,0.027264000847935677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,10240,0.045951999723911285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,16384,0.06815999746322632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,3584,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,3072,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,2560,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,2048,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,1536,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,1024,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,512,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,12288,0.053888000547885895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,7168,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,8192,0.03836800158023834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,5120,0.026976000517606735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,10240,0.045791998505592346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,16384,0.06764800101518631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,3584,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,2560,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,4096,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,3072,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,2048,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,1536,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,512,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,1024,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,65536,0.24643200635910034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,256,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,12288,0.05395200103521347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,7168,0.034591998904943466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,8192,0.03811199963092804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,5120,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,10240,0.045504000037908554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,16384,0.06790400296449661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,65536,0.24719999730587006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,4096,0.023231999948620796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,3584,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,2048,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,3072,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,1536,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,2560,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,1024,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,512,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,12288,0.039455998688936234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,8192,0.031072000041604042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,10240,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,5120,0.02332800067961216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,7168,0.02860799990594387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,65536,0.2733759880065918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,16384,0.04604800045490265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,4096,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,3584,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,3072,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,2560,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,2048,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,1536,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,512,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,12288,0.03686400130391121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,10240,0.03324799984693527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,8192,0.029055999591946602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,5120,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,16384,0.04243199899792671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,7168,0.026208000257611275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,65536,0.12966400384902954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,3072,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,2048,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,3584,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,2560,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,1536,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,1024,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,12288,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,10240,0.03545600175857544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,7168,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,5120,0.02070399932563305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,8192,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,16384,0.03446400165557861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,65536,0.11881600320339203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,4096,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,3072,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,3584,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,2560,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,2048,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,12288,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,10240,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,7168,0.022143999114632607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,5120,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,16384,0.031007999554276466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,65536,0.09055999666452408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,4096,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,8192,0.022784000262618065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,3584,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,3072,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,2560,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,2048,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,1536,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,1024,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,128,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,12288,0.026208000257611275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,10240,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,5120,0.020479999482631683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,8192,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,7168,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,65536,0.0761599987745285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,16384,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,4096,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,3584,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,3072,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,2048,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,2560,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,1536,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,512,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,12288,0.02611199952661991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,5120,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,10240,0.02425600029528141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,8192,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,65536,0.06681600213050842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,16384,0.02848000079393387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,7168,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,4096,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,3584,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,3072,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,2560,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,2048,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,512,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,256,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,12288,0.02505600079894066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,8192,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,65536,0.06095999851822853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,7168,0.019392000511288643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,10240,0.02425600029528141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,16384,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,5120,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,4096,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,3584,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,2560,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,3072,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,1536,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,2048,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,12288,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,65536,0.053568001836538315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,10240,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,7168,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,16384,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,5120,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,8192,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,4096,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,3584,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,2560,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,3072,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,2048,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,1536,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,1024,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,12288,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,65536,0.048576001077890396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,10240,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,16384,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,8192,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,7168,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,5120,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,4096,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,3584,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,1536,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,3072,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,2048,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,2560,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,1024,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,12288,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,10240,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,8192,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,7168,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,65536,0.04297599941492081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,16384,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,5120,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,4096,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,3584,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,2560,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,1536,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,1024,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,2048,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,3072,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,256,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,128,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,12288,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,8192,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,16384,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,10240,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,65536,0.048448000103235245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,7168,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,5120,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,4096,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,3584,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,3072,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,2048,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,2560,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,1024,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,1536,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,256,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,12288,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,65536,0.04521600157022476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,8192,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,10240,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,5120,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,7168,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,16384,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,4096,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,3584,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,3072,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,2560,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,2048,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,1536,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,1024,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,512,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,256,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,12288,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,7168,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,16384,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,8192,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,10240,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,5120,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,65536,0.046560000628232956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,4096,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,3584,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,3072,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,2560,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,1536,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,2048,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,1024,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,512,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,256,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,65536,0.043487999588251114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,4096,0.07017599791288376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,5120,0.08486399799585342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,7168,0.11452800035476685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,8192,0.12992000579833984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,12288,0.18911999464035034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,3584,0.06239999830722809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,10240,0.15878400206565857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,3072,0.054816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,2560,0.04761600121855736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,2048,0.04009599983692169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,512,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,1024,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,16384,0.24879999458789825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,1536,0.03254399821162224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,256,0.014879999682307243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,128,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,12288,0.05423999950289726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,4096,0.02377600036561489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,7168,0.03500799834728241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,8192,0.03846399858593941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,5120,0.02751999907195568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,10240,0.04572800174355507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,16384,0.06931199878454208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,3072,0.026815999299287796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,3584,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,2048,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,1024,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,1536,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,2560,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,256,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,128,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,12288,0.04867200180888176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,8192,0.035551998764276505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,7168,0.033952001482248306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,5120,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,10240,0.04374400153756142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,16384,0.060095999389886856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,4096,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,3584,0.02163200080394745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,3072,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,2560,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,2048,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,1536,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,65536,0.2465600073337555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,512,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,128,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,1024,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,12288,0.04156799986958504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,8192,0.0307839997112751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,7168,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,5120,0.025696000084280968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,16384,0.05251200124621391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,10240,0.03766399994492531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,4096,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,65536,0.20175999402999878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,3584,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,3072,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,2048,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,1536,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,2560,0.01833599992096424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,1024,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,512,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,256,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,128,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,12288,0.03551999852061272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,10240,0.03177599981427193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,7168,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,5120,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,8192,0.026623999699950218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,16384,0.04383999854326248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,3584,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,4096,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,65536,0.17023999989032745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,3072,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,2560,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,2048,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,1536,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,1024,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,128,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,512,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,12288,0.03177599981427193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,16384,0.038816001266241074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,8192,0.024159999564290047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,10240,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,7168,0.022911999374628067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,5120,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,4096,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,65536,0.13087999820709229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,3584,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,3072,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,2560,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,1536,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,512,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,2048,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,1024,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,256,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,12288,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,8192,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,5120,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,7168,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,10240,0.02300800010561943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,65536,0.11695999652147293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,16384,0.03046399913728237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,4096,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,3584,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,2560,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,3072,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,2048,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,1536,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,65536,0.08835200220346451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,512,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,12288,0.02223999984562397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,8192,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,10240,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,4096,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,5120,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,7168,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,16384,0.02691200003027916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,3584,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,3072,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,2560,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,2048,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,1536,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,512,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,128,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,12288,0.02191999927163124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,7168,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,16384,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,10240,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,5120,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,65536,0.0751039981842041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,8192,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,4096,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,3072,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,3584,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,2560,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,2048,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,1536,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,1024,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,12288,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,8192,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,16384,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,7168,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,5120,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,10240,0.01692800037562847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,65536,0.06700800359249115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,4096,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,3584,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,3072,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,2560,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,2048,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,1024,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,1536,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,512,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,256,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,12288,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,8192,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,16384,0.020800000056624413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,10240,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,5120,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,7168,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,65536,0.06147199869155884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,4096,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,3584,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,3072,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,2048,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,2560,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,1536,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,1024,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,512,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,256,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,128,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,12288,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,65536,0.05407999828457832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,10240,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,5120,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,7168,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,16384,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,4096,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,8192,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,3584,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,3072,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,2560,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,2048,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,1536,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,1024,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,256,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,128,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,512,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,12288,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,65536,0.047040000557899475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,10240,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,8192,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,16384,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,7168,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,5120,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,3584,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,2560,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,4096,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,2048,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,1536,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,3072,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,1024,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,512,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,256,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,128,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,12288,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,65536,0.04339199885725975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,10240,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,8192,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,16384,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,7168,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,5120,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,4096,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,3584,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,1536,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,2560,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,2048,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,3072,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,1024,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,512,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,256,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,128,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,12288,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,8192,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,65536,0.038656000047922134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,10240,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,16384,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,5120,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,7168,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,4096,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,3584,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,3072,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,2048,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,1536,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,2560,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,1024,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,512,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,256,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,128,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,12288,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,65536,0.03734400123357773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,10240,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,8192,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,16384,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,5120,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,7168,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,3584,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,4096,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,2048,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,3072,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,2560,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,1536,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,1024,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,512,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,128,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,256,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,12288,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,10240,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,16384,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,65536,0.036479998379945755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,8192,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,7168,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,5120,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,4096,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,3584,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,2560,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,1536,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,3072,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,2048,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,1024,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,512,0.007455999962985516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,256,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,128,0.007455999962985516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,65536,0.03670400008559227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,5120,0.07395199686288834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,4096,0.056352000683546066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,7168,0.08575999736785889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,8192,0.09283199906349182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,10240,0.11539199948310852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,12288,0.1319040060043335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,3584,0.05462399870157242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,2048,0.040383998304605484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,2560,0.047520000487565994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,3072,0.04956800118088722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,1024,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,256,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,16384,0.1716800034046173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,512,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,1536,0.032896000891923904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,128,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,12288,0.04076800122857094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,5120,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,8192,0.03014400042593479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,4096,0.02038400061428547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,7168,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,10240,0.036479998379945755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,16384,0.050464000552892685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,2560,0.01744000054895878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,3584,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,2048,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,3072,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,1536,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,1024,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,512,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,256,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,128,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,12288,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,8192,0.024671999737620354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,10240,0.030079999938607216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,5120,0.020640000700950623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,7168,0.02304000034928322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,16384,0.04054399952292442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,4096,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,3584,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,2560,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,3072,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,2048,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,1536,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,1024,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,512,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,256,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,65536,0.16700799763202667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,12288,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,8192,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,5120,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,7168,0.02070399932563305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,10240,0.02659199945628643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,16384,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,65536,0.12931199371814728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,3584,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,4096,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,3072,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,2560,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,2048,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,1536,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,256,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,12288,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,16384,0.028896000236272812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,5120,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,8192,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,7168,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,10240,0.022016000002622604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,65536,0.1098560020327568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,4096,0.013183999806642532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,2560,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,3584,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,3072,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,2048,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,1536,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,1024,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,12288,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,10240,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,65536,0.08991999924182892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,7168,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,5120,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,16384,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,8192,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,4096,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,3584,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,3072,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,1536,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,2560,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,2048,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,1024,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,512,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,12288,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,8192,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,7168,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,10240,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,16384,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,5120,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,4096,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,65536,0.08083199709653854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,3072,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,3584,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,2048,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,1536,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,2560,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,1024,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,512,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,128,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,12288,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,5120,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,7168,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,8192,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,10240,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,16384,0.01881599985063076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,65536,0.07039999961853027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,4096,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,3584,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,3072,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,2048,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,2560,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,1536,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,1024,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,512,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,128,0.007296000141650438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,256,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,12288,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,65536,0.04947200044989586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,10240,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,5120,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,8192,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,7168,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,16384,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,4096,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,3584,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,3072,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,2560,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,1536,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,2048,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,1024,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,512,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,256,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,128,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,12288,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,16384,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,8192,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,10240,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,5120,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,7168,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,65536,0.050175998359918594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,3584,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,4096,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,3072,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,1536,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,1024,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,2048,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,2560,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,512,0.007360000163316727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,128,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,256,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,12288,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,10240,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,8192,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,16384,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,65536,0.04217600077390671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,7168,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,5120,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,4096,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,3584,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,3072,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,2048,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,1536,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,2560,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,1024,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,512,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,256,0.007135999854654074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,128,0.007104000076651573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,12288,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,8192,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,10240,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,65536,0.040832001715898514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,5120,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,16384,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,7168,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,4096,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,3584,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,2048,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,2560,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,1536,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,3072,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,1024,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,512,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,128,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,256,0.007135999854654074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,12288,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,8192,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,10240,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,65536,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,16384,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,7168,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,5120,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,4096,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,3584,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,2048,0.007584000006318092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,2560,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,1536,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,1024,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,3072,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,512,0.0072639998979866505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,256,0.007135999854654074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,128,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,12288,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,65536,0.032575998455286026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,10240,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,16384,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,5120,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,7168,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,8192,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,4096,0.007584000006318092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,3584,0.007455999962985516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,3072,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,2560,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,2048,0.007455999962985516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,1024,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,1536,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,512,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,256,0.007104000076651573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,128,0.006688000168651342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,12288,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,65536,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,7168,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,10240,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,8192,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,16384,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,5120,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,4096,0.007360000163316727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,3584,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,3072,0.0072639998979866505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,2560,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,1536,0.007104000076651573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,2048,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,1024,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,512,0.0071680000983178616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,256,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,128,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,12288,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,65536,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,10240,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,16384,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,5120,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,8192,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,7168,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,4096,0.007296000141650438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,3584,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,2560,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,3072,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,2048,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,1024,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,1536,0.007135999854654074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,512,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,256,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,128,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,12288,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,65536,0.02816000021994114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,7168,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,8192,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,10240,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,16384,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,5120,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,4096,0.007360000163316727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,3072,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,3584,0.0071680000983178616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,1536,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,2560,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,2048,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,1024,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,512,0.007040000054985285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,256,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,128,0.0066559999249875546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,65536,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,4096,0.044544000178575516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,5120,0.059487998485565186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,7168,0.0689919963479042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,8192,0.07583999633789062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,12288,0.10649599879980087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,10240,0.09657599776983261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,3584,0.0424639992415905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,2560,0.036639999598264694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,2048,0.03324799984693527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,1536,0.032896000891923904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,3072,0.03891199827194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,16384,0.14032000303268433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,1024,0.029600000008940697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,512,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,256,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,128,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,12288,0.03308799862861633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,4096,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,5120,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,8192,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,10240,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,7168,0.023072000592947006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,16384,0.04108799993991852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,3072,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,3584,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,2560,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,2048,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,1024,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,512,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,256,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,128,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,12288,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,8192,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,10240,0.02831999957561493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,5120,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,7168,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,16384,0.03564799949526787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,4096,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,3584,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,2560,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,2048,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,1536,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,3072,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,1024,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,512,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,65536,0.139615997672081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,256,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,12288,0.026655999943614006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,5120,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,7168,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,8192,0.021023999899625778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,10240,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,16384,0.03328000009059906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,4096,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,65536,0.12118399888277054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,3584,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,3072,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,2560,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,2048,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,1536,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,1024,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,512,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,256,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,128,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,12288,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,8192,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,5120,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,16384,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,10240,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,7168,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,4096,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,65536,0.1117440015077591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,3584,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,3072,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,2560,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,2048,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,1536,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,256,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,512,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,1024,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,12288,0.019200000911951065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,7168,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,8192,0.014879999682307243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,10240,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,5120,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,16384,0.0226879995316267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,65536,0.07472000271081924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,4096,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,3584,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,2560,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,3072,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,2048,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,1536,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,1024,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,512,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,256,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,12288,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,8192,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,10240,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,7168,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,5120,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,65536,0.06921599805355072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,16384,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,4096,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,3584,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,3072,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,2560,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,2048,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,1536,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,1024,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,512,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,12288,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,16384,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,8192,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,10240,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,5120,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,7168,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,4096,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,65536,0.05769599974155426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,3584,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,3072,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,2048,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,1536,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,2560,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,1024,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,512,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,256,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,12288,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,65536,0.05315199866890907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,8192,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,10240,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,7168,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,5120,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,16384,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,4096,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,3584,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,2048,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,2560,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,3072,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,1024,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,1536,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,512,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,256,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,12288,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,10240,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,7168,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,5120,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,16384,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,8192,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,65536,0.052000001072883606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,4096,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,3584,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,3072,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,2048,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,2560,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,1024,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,1536,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,512,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,256,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,128,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,12288,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,10240,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,8192,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,16384,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,5120,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,7168,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,65536,0.049247998744249344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,4096,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,2560,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,2048,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,3072,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,1024,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,1536,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,3584,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,512,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,256,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,128,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,8192,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,12288,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,10240,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,16384,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,5120,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,7168,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,65536,0.049056001007556915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,4096,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,2048,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,1536,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,2560,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,3072,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,3584,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,1024,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,512,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,256,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,128,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,12288,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,65536,0.04623999819159508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,10240,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,8192,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,7168,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,5120,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,16384,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,4096,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,3584,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,2048,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,1536,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,2560,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,3072,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,1024,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,512,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,256,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,128,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,12288,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,8192,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,65536,0.042847998440265656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,10240,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,5120,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,16384,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,7168,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,3584,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,4096,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,2048,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,3072,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,1536,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,1024,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,2560,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,512,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,12288,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,65536,0.04492799937725067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,10240,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,16384,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,8192,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,5120,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,7168,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,4096,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,3584,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,3072,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,2048,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,2560,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,1024,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,1536,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,512,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,256,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,128,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,12288,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,8192,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,65536,0.04281599819660187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,7168,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,10240,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,16384,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,5120,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,4096,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,2560,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,3072,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,3584,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,2048,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,1536,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,1024,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,512,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,256,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,128,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,12288,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,65536,0.04320000112056732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,8192,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,10240,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,5120,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,16384,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,7168,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,4096,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,3584,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,3072,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,1024,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,2048,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,2560,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,1536,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,512,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,256,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,128,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,65536,0.04303999990224838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,3584,4.014400005340576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,4096,4.504928112030029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,5120,5.4976959228515625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,2560,3.0167999267578125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,3072,3.5086400508880615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,7168,7.512928009033203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,2048,2.520416021347046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,256,0.7594559788703918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,8192,8.476832389831543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,512,1.0319039821624756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,1024,1.5483200550079346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,128,0.5874559879302979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,1536,2.0348479747772217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,10240,10.457695960998535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,8192,2.1765758991241455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,10240,2.680896043777466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,12288,12.464320182800293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,12288,3.193056106567383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,7168,1.9249919652938843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,5120,1.417855978012085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,16384,4.193759918212891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,4096,1.1632640361785889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,3072,0.9077759981155396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,2560,0.7797120213508606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,3584,1.035871982574463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,128,0.1531199961900711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,512,0.26892799139022827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,256,0.19619199633598328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,1024,0.3917120099067688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,1536,0.5278080105781555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,2048,0.6529920101165771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,7168,1.4468159675598145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,8192,1.6325119733810425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,10240,2.0122880935668945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,12288,2.3910720348358154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,16384,16.362112045288086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,5120,1.0662720203399658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,4096,0.8750720024108887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,3584,0.7811840176582336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,2560,0.5886399745941162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,3072,0.684224009513855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,1536,0.401856005191803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,2048,0.4934079945087433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,16384,3.1473278999328613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,1024,0.3030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,512,0.20416000485420227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,128,0.11823999881744385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,256,0.1491200029850006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,7168,1.2065279483795166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,8192,1.3619840145111084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,10240,1.6788159608840942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,12288,1.9933439493179321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,5120,0.8903679847717285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,4096,0.730239987373352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,3584,0.6528319716453552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,2048,0.41363200545310974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,3072,0.5715519785881042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,2560,0.49187201261520386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,16384,2.62390398979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,1536,0.33401599526405334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,512,0.17235200107097626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,256,0.12636800110340118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,128,0.09935999661684036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,1024,0.25385600328445435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,8192,1.0902719497680664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,10240,1.3436800241470337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,12288,1.5958399772644043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,16384,2.09932804107666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,5120,0.7137920260429382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,7168,0.9666560292243958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,4096,0.5846719741821289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,2560,0.39478400349617004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,2048,0.33110401034355164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,3072,0.4588800072669983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,3584,0.5222399830818176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,512,0.1390720009803772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,256,0.10156799852848053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,1024,0.20412799715995789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,1536,0.26892799139022827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,128,0.08064000308513641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,10240,1.1784000396728516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,12288,1.3986560106277466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,16384,1.8397760391235352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,8192,0.955456018447876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,7168,0.8461440205574036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,5120,0.6241599917411804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,4096,0.5138239860534668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,3584,0.46009600162506104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,3072,0.40252798795700073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,2048,0.2916800081729889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,65536,18.08755111694336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,2560,0.3468160033226013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,1536,0.23600000143051147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,256,0.09068799763917923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,512,0.12195199728012085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,1024,0.18031999468803406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,128,0.07222399860620499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,65536,14.201791763305664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,10240,0.8428800106048584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,12288,1.0086079835891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,8192,0.6840639710426331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,16384,1.315999984741211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,5120,0.44867199659347534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,65536,11.568415641784668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,7168,0.6072959899902344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,4096,0.3688639998435974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,3584,0.3302080035209656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,1536,0.16991999745368958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,2048,0.21033599972724915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,2560,0.2513920068740845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,3072,0.29068800806999207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,512,0.08908800035715103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,256,0.0663359984755516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,128,0.05427199974656105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,1024,0.13020800054073334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,65536,9.319552421569824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,8192,0.5479679703712463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,10240,0.6769279837608337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,12288,0.8014079928398132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,7168,0.4859839975833893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,5120,0.3606719970703125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,4096,0.2959359884262085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,16384,1.053663969039917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,3584,0.2645759880542755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,3072,0.23286400735378265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,2560,0.20112000405788422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,512,0.0719040036201477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,2048,0.16927999258041382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,1024,0.10527999699115753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,1536,0.13699199259281158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,128,0.044895999133586884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,256,0.055456001311540604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,8192,0.4814079999923706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,65536,8.128128051757812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,10240,0.592415988445282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,12288,0.7077119946479797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,4096,0.25964799523353577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,16384,0.9234880208969116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,3584,0.23296000063419342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,5120,0.31619200110435486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,7168,0.427264004945755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,1024,0.09318400174379349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,1536,0.12131199985742569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,2048,0.14883199334144592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,2560,0.17740799486637115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,3072,0.20476800203323364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,256,0.04944000020623207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,512,0.06483200192451477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,128,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,8192,0.41337600350379944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,10240,0.5087040066719055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,12288,0.6036800146102905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,65536,5.762303829193115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,16384,0.7925440073013306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,7168,0.3664320111274719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,4096,0.22431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,5120,0.27209600806236267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,3584,0.2006720006465912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,1536,0.10460799932479858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,3072,0.1762239933013916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,2048,0.12848000228405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,1024,0.08035200089216232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,2560,0.15244799852371216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,128,0.035392001271247864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,512,0.056223999708890915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,256,0.04287999868392944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,8192,0.3498240113258362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,10240,0.4272960126399994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,65536,4.697535991668701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,12288,0.5050879716873169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,16384,0.6627200245857239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,4096,0.1887039989233017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,3584,0.16832000017166138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,5120,0.2279680073261261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,7168,0.3068479895591736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,1024,0.06876800209283829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,1536,0.08828800171613693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,2048,0.10835199803113937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,2560,0.12848000228405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,3072,0.1483840048313141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,512,0.04800000041723251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,256,0.03840000182390213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,128,0.0307839997112751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,8192,0.2778240144252777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,65536,4.116896152496338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,10240,0.34137600660324097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,12288,0.4041920006275177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,5120,0.18355199694633484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,4096,0.151296004652977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,16384,0.5292479991912842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,3584,0.13574400544166565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,7168,0.24643200635910034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,1024,0.05584000051021576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,1536,0.0716480016708374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,2048,0.08742400258779526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,3072,0.1204800009727478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,2560,0.10489600151777267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,512,0.039455998688936234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,256,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,128,0.02611199952661991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,8192,0.20988799631595612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,65536,3.5589120388031006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,10240,0.2580159902572632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,12288,0.30460798740386963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,16384,0.39948800206184387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,5120,0.1393599957227707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,4096,0.11503999680280685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,3584,0.10310400277376175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,7168,0.18649600446224213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,2048,0.06790400296449661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,1536,0.05552000179886818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,1024,0.043007999658584595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,2560,0.07999999821186066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,3072,0.09171199798583984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,256,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,128,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,512,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,65536,2.9123520851135254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,8192,0.14150400459766388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,10240,0.1735360026359558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,12288,0.20492799580097198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,5120,0.09455999732017517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,16384,0.26822400093078613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,4096,0.07862400263547897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,3584,0.07056000083684921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,7168,0.1265919953584671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,2048,0.047488000243902206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,1024,0.031007999554276466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,3072,0.06300800293684006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,1536,0.03868800029158592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,2560,0.05478399991989136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,256,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,128,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,512,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,8192,0.07395199686288834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,65536,2.3975040912628174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,10240,0.09046400338411331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,12288,0.1056319996714592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,16384,0.13708800077438354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,5120,0.05052800104022026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,4096,0.04214400053024292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,3584,0.041760001331567764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,7168,0.06639999896287918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,2048,0.027103999629616737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,1536,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,2560,0.032255999743938446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,3072,0.036928001791238785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,1024,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,256,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,128,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,512,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,65536,1.7829439640045166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,8192,0.054016001522541046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,10240,0.06576000154018402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,12288,0.07619199901819229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,16384,0.09862399846315384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,5120,0.03683200106024742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,7168,0.0488319993019104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,4096,0.03753599897027016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,3584,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,2560,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,2048,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,1024,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,1536,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,3072,0.02953599952161312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,128,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,256,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,512,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,65536,1.2136319875717163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,8192,0.032735999673604965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,10240,0.039455998688936234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,12288,0.04492799937725067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,7168,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,5120,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,16384,0.05817599967122078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,4096,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,3584,0.019039999693632126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,3072,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,2560,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,1536,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,65536,0.616320013999939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,65536,0.42339199781417847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,65536,0.2439039945602417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,5120,2.790208101272583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,4096,2.2830400466918945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,7168,3.784991979598999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,8192,4.274816036224365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,3072,1.780735969543457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,3584,2.03110408782959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,2048,1.2637120485305786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,2560,1.5179840326309204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,512,0.5145279765129089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,10240,5.283135890960693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,1024,0.7599679827690125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,1536,1.011423945426941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,128,0.30031999945640564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,256,0.3880639970302582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,12288,6.2682881355285645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,7168,0.9658560156822205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,8192,1.0901440382003784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,10240,1.3441599607467651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,5120,0.7134720087051392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,12288,1.6067520380020142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,4096,0.5856959819793701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,3584,0.5224639773368835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,3072,0.4613119959831238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,2048,0.3266240060329437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,2560,0.3930239975452423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,1024,0.2006399929523468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,1536,0.26204800605773926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,512,0.1361600011587143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,256,0.10300800204277039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,128,0.08204799890518188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,16384,2.100032091140747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,16384,8.250528335571289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,7168,0.7271680235862732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,8192,0.8197119832038879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,10240,1.0102399587631226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,5120,0.5382720232009888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,12288,1.2023680210113525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,3584,0.3960320055484772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,4096,0.4415999948978424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,2560,0.2975679934024811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,3072,0.3453119993209839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,2048,0.248416006565094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,1024,0.1528960019350052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,1536,0.20022399723529816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,256,0.0790719985961914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,512,0.10502400249242783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,128,0.06329599767923355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,16384,1.576416015625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,7168,0.6062719821929932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,8192,0.6840320229530334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,10240,0.8429120182991028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,12288,0.9996479749679565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,4096,0.368831992149353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,5120,0.44889599084854126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,3584,0.3295679986476898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,3072,0.28908801078796387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,16384,1.3156479597091675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,2048,0.20735999941825867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,2560,0.24991999566555023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,1536,0.1687999963760376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,1024,0.12831999361515045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,512,0.08892799913883209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,128,0.05427199974656105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,256,0.06758400052785873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,8192,0.5479040145874023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,10240,0.6751360297203064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,12288,0.8069440126419067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,16384,1.0526399612426758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,4096,0.29820799827575684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,7168,0.4872319996356964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,5120,0.35897600650787354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,2048,0.16735999286174774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,2560,0.20006400346755981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,3584,0.26441600918769836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,3072,0.2324800044298172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,512,0.07171200215816498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,256,0.0570559985935688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,1024,0.1043199971318245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,1536,0.1350719928741455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,128,0.045471999794244766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,10240,0.5919039845466614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,12288,0.7019839882850647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,16384,0.9222400188446045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,7168,0.42668798565864563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,8192,0.4805760085582733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,5120,0.3155519962310791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,65536,8.366016387939453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,3584,0.23254400491714478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,4096,0.25948798656463623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,3072,0.20393599569797516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,1536,0.11935999989509583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,2048,0.14847999811172485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,65536,6.445119857788086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,2560,0.1767359972000122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,1024,0.09190399944782257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,256,0.04979199916124344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,512,0.06374400109052658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,128,0.040800001472234726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,8192,0.3463680148124695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,65536,5.319424152374268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,10240,0.424703985452652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,12288,0.5033919811248779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,4096,0.1881600022315979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,5120,0.2274239957332611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,16384,0.6609920263290405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,3584,0.16767999529838562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,7168,0.3079040050506592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,1536,0.08774399757385254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,2048,0.1080000028014183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,1024,0.06864000111818314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,2560,0.12806400656700134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,3072,0.14787200093269348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,512,0.04793599992990494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,128,0.03200000151991844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,256,0.03843199834227562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,65536,4.082592010498047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,8192,0.27747198939323425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,7168,0.24620799720287323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,10240,0.34307199716567993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,12288,0.4041599929332733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,4096,0.15087999403476715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,5120,0.18323199450969696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,3584,0.13526399433612823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,16384,0.5294719934463501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,3072,0.11971200257539749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,1024,0.05539200082421303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,2560,0.10339199751615524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,512,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,2048,0.08745600283145905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,1536,0.07148800045251846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,128,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,256,0.03215999901294708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,65536,3.896512031555176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,8192,0.2770879864692688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,12288,0.40380799770355225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,10240,0.34031999111175537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,7168,0.2460159957408905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,5120,0.18387199938297272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,16384,0.529312014579773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,3584,0.13523200154304504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,4096,0.15113599598407745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,3072,0.11952000111341476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,2048,0.08716800063848495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,1536,0.07135999947786331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,1024,0.05539200082421303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,2560,0.10345599800348282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,128,0.02470399998128414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,512,0.03791999816894531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,256,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,65536,2.7581119537353516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,8192,0.21068799495697021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,10240,0.25884801149368286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,12288,0.30460798740386963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,5120,0.13948799669742584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,16384,0.3994239866733551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,4096,0.11558400094509125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,3584,0.10313600301742554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,7168,0.18716800212860107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,2560,0.07971200346946716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,2048,0.0679360032081604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,3072,0.09116800129413605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,1536,0.05523199960589409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,1024,0.043296001851558685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,256,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,128,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,512,0.03094400092959404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,65536,2.0450239181518555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,8192,0.2101760059595108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,10240,0.25760000944137573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,12288,0.30646398663520813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,7168,0.18716800212860107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,16384,0.39932799339294434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,5120,0.13900800049304962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,4096,0.11513599753379822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,3584,0.10294400155544281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,3072,0.09151999652385712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,2048,0.06735999882221222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,2560,0.07916799932718277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,1024,0.042527999728918076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,1536,0.054687999188899994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,512,0.03001599945127964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,128,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,256,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,65536,2.1690878868103027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,8192,0.14176000654697418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,10240,0.17430399358272552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,12288,0.20499199628829956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,7168,0.12627199292182922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,16384,0.2672959864139557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,5120,0.09567999839782715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,4096,0.07894399762153625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,3584,0.07103999704122543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,3072,0.06284800171852112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,2560,0.05564799904823303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,2048,0.046911999583244324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,512,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,1536,0.039903998374938965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,1024,0.03097599931061268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,128,0.017503999173641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,256,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,65536,1.7255680561065674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,8192,0.1422400027513504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,12288,0.20559999346733093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,7168,0.12614400684833527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,10240,0.17395199835300446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,16384,0.2677760124206543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,5120,0.09408000111579895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,3072,0.060447998344898224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,4096,0.07676800340414047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,3584,0.0692799985408783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,2560,0.05244800075888634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,512,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,1024,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,1536,0.03651199862360954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,2048,0.044256001710891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,256,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,128,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,65536,1.587488055229187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,8192,0.07391999661922455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,12288,0.10531199723482132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,7168,0.06630399823188782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,10240,0.08975999802350998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,16384,0.1372479945421219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,4096,0.04460800066590309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,3072,0.034591998904943466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,3584,0.03859199956059456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,5120,0.05040000006556511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,1024,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,512,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,2560,0.031007999554276466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,2048,0.02643200010061264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,1536,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,256,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,128,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,65536,1.1604479551315308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,7168,0.04848000034689903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,8192,0.05315199866890907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,12288,0.0761599987745285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,10240,0.06543999910354614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,16384,0.09785600006580353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,3584,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,4096,0.034304000437259674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,5120,0.042399998754262924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,3072,0.02755199931561947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,512,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,1024,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,2560,0.02332800067961216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,1536,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,2048,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,256,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,128,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,65536,1.1472959518432617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,8192,0.032575998455286026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,7168,0.02956799976527691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,12288,0.04495999962091446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,10240,0.03868800029158592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,4096,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,16384,0.05686400085687637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,3584,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,5120,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,1024,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,1536,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,2048,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,65536,0.5830079913139343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,256,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,7168,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,8192,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,10240,0.026367999613285065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,12288,0.03046399913728237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,4096,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,16384,0.04028800129890442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,5120,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,3584,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,3072,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,65536,0.3645760118961334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,512,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,2048,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,2560,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,1024,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,1536,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,128,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,65536,0.20364800095558167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,65536,0.12252800166606903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,5120,1.4179840087890625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,4096,1.158784031867981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,7168,1.9276479482650757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,8192,2.1774399280548096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,3072,0.9026240110397339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,3584,1.0297280550003052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,2560,0.7720959782600403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,2048,0.6464639902114868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,10240,2.6856958866119385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,1536,0.5174400210380554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,512,0.2613759934902191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,256,0.20006400346755981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,1024,0.38815999031066895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,128,0.155008003115654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,12288,3.196928024291992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,7168,0.48713600635528564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,8192,0.5501760244369507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,10240,0.6752960085868835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,5120,0.35945600271224976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,12288,0.8010240197181702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,4096,0.29583999514579773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,3584,0.2650879919528961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,2560,0.2006080001592636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,2048,0.16988800466060638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,3072,0.23286400735378265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,16384,1.054975986480713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,256,0.0570559985935688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,1536,0.13596799969673157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,512,0.07308799773454666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,128,0.04553600028157234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,1024,0.10422399640083313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,16384,4.1943039894104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,7168,0.3667199909687042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,8192,0.4131839871406555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,5120,0.2728959918022156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,10240,0.5084159970283508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,12288,0.6026880145072937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,4096,0.22351999580860138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,3584,0.20025600492954254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,3072,0.17721599340438843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,2560,0.15273599326610565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,2048,0.12950399518013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,512,0.057440001517534256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,1024,0.08073599636554718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,256,0.04435199871659279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,16384,0.7909759879112244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,1536,0.10502400249242783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,128,0.036288000643253326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,7168,0.3080640137195587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,8192,0.3457599878311157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,10240,0.4256319999694824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,12288,0.503711998462677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,4096,0.18726399540901184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,5120,0.22732800245285034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,3584,0.16809600591659546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,3072,0.14815999567508698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,16384,0.6603839993476868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,1536,0.08832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,2560,0.12905600666999817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,512,0.04848000034689903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,2048,0.10864000022411346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,1024,0.06790400296449661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,256,0.03852799907326698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,128,0.03215999901294708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,8192,0.2784639894962311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,12288,0.4039359986782074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,10240,0.34246399998664856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,16384,0.5303040146827698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,4096,0.1517760008573532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,5120,0.1831039935350418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,7168,0.24739199876785278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,3584,0.13635200262069702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,2560,0.10406400263309479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,3072,0.11974400281906128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,2048,0.08841600269079208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,512,0.04028800129890442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,1536,0.07177600264549255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,1024,0.05657599866390228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,256,0.03296000137925148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,128,0.02723200060427189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,10240,0.34064000844955444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,12288,0.40348801016807556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,65536,4.081984043121338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,16384,0.5292479991912842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,65536,3.0758399963378906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,5120,0.1847040057182312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,7168,0.24643200635910034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,8192,0.2773759961128235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,4096,0.15107199549674988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,3584,0.13619199395179749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,3072,0.11964800208806992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,2048,0.08755200356245041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,2560,0.10396800190210342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,1536,0.07116799801588058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,1024,0.05558399856090546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,256,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,512,0.03891199827194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,128,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,65536,2.553663969039917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,8192,0.21030400693416595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,7168,0.18627199530601501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,10240,0.2603839933872223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,12288,0.3043839931488037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,16384,0.4000000059604645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,5120,0.13948799669742584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,3072,0.09164799749851227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,3584,0.10380800068378448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,4096,0.1151999980211258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,65536,2.04367995262146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,512,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,256,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,1536,0.05539200082421303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,1024,0.04275200143456459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,2560,0.07932800054550171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,2048,0.06755200028419495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,128,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,8192,0.14342400431632996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,7168,0.12671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,10240,0.1757120043039322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,12288,0.20521600544452667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,16384,0.2682879865169525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,5120,0.09567999839782715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,3584,0.07119999825954437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,4096,0.07884799689054489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,3072,0.06345599889755249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,512,0.023615999147295952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,1024,0.030848000198602676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,2560,0.05632000043988228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,1536,0.03951999917626381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,2048,0.0480320006608963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,256,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,128,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,65536,2.0437440872192383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,12288,0.20550400018692017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,8192,0.14195199310779572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,10240,0.17430399358272552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,7168,0.12627199292182922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,16384,0.2690559923648834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,4096,0.07846400141716003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,5120,0.09459199756383896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,3072,0.06278400123119354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,3584,0.07088000327348709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,2560,0.05555199831724167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,2048,0.047168001532554626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,512,0.02396799996495247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,1024,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,1536,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,256,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,128,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,65536,1.534432053565979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,7168,0.12572799623012543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,8192,0.1425279974937439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,10240,0.17404800653457642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,12288,0.2048639953136444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,16384,0.267520010471344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,3072,0.06124800071120262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,3584,0.06966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,4096,0.0767040029168129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,5120,0.09312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,65536,1.0250240564346313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,512,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,2560,0.05273599922657013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,1024,0.02816000021994114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,1536,0.03670400008559227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,2048,0.04479999840259552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,256,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,128,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,7168,0.10739199817180634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,8192,0.12108799815177917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,12288,0.17420800030231476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,10240,0.14732800424098969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,16384,0.23267200589179993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,4096,0.06796800345182419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,3072,0.054048001766204834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,5120,0.0796160027384758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,3584,0.061664000153541565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,65536,1.0274560451507568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,512,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,1024,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,2048,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,256,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,1536,0.03215999901294708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,2560,0.04668800160288811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,128,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,8192,0.07507199794054031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,7168,0.06672000139951706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,12288,0.10543999820947647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,10240,0.09027200192213058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,16384,0.1372479945421219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,3584,0.03884800150990486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,5120,0.05049600079655647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,4096,0.0427200011909008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,3072,0.03478400036692619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,1024,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,2560,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,65536,1.0250240564346313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,2048,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,512,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,1536,0.022911999374628067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,256,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,128,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,10240,0.08985599875450134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,12288,0.10608000308275223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,7168,0.06646399945020676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,8192,0.07427199929952621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,16384,0.13657599687576294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,3584,0.03846399858593941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,3072,0.034272000193595886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,4096,0.04211200028657913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,5120,0.050144001841545105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,2560,0.029600000008940697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,65536,0.9079040288925171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,2048,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,1024,0.01708799973130226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,1536,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,512,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,256,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,128,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,8192,0.0634239986538887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,12288,0.07619199901819229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,10240,0.06531199812889099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,7168,0.04806400090456009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,16384,0.0976639986038208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,65536,0.5155199766159058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,3072,0.026784000918269157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,3584,0.028575999662280083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,5120,0.03984000161290169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,4096,0.03302399814128876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,512,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,256,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,2048,0.020320000126957893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,1024,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,2560,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,1536,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,128,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,8192,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,7168,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,12288,0.045024000108242035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,10240,0.0387520007789135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,65536,0.5154880285263062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,16384,0.05686400085687637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,5120,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,3072,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,4096,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,3584,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,2048,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,512,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,128,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,8192,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,7168,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,12288,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,10240,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,65536,0.3633919954299927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,16384,0.035360001027584076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,3072,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,3584,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,4096,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,5120,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,2048,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,2560,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,1024,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,1536,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,7168,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,8192,0.03200000151991844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,12288,0.04732799902558327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,65536,0.20207999646663666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,10240,0.03929600119590759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,16384,0.06095999851822853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,5120,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,4096,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,3072,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,3584,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,2560,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,2048,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,1536,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,1024,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,512,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,65536,0.118367999792099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,65536,0.1181119978427887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,5120,0.7134720087051392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,4096,0.589631974697113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,7168,0.9711679816246033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,8192,1.0983680486679077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,3072,0.4605120122432709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,3584,0.5239359736442566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,2560,0.3967039883136749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,2048,0.3338559865951538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,10240,1.3490879535675049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,1536,0.2703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,256,0.1064319983124733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,512,0.1401599943637848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,1024,0.20496000349521637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,128,0.08348800241947174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,12288,1.6083199977874756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,5120,0.18275199830532074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,7168,0.24611200392246246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,8192,0.2795200049877167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,10240,0.34140801429748535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,12288,0.4061119854450226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,4096,0.1523520052433014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,3584,0.13680000603199005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,3072,0.11999999731779099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,2560,0.10438399761915207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,16384,0.5305280089378357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,2048,0.0891840010881424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,16384,2.101599931716919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,512,0.041280001401901245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,256,0.03283200040459633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,1536,0.07254400104284286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,1024,0.05798399820923805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,128,0.02768000029027462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,5120,0.1408960074186325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,7168,0.18758399784564972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,8192,0.2096319943666458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,10240,0.2579840123653412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,12288,0.30745598673820496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,4096,0.11539199948310852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,3584,0.10454399883747101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,3072,0.09276799857616425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,2048,0.06787200272083282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,2560,0.08012799918651581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,16384,0.3998720049858093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,512,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,1536,0.056352000683546066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,1024,0.04499199986457825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,128,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,256,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,7168,0.1860480010509491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,8192,0.20918400585651398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,12288,0.305759996175766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,10240,0.2574079930782318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,16384,0.39907199144363403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,4096,0.11452800035476685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,3584,0.10393600165843964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,3072,0.09212800115346909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,5120,0.1387840062379837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,512,0.03187200054526329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,1024,0.04575999826192856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,1536,0.05632000043988228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,2048,0.06732799857854843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,2560,0.07968000322580338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,128,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,256,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,12288,0.20537599921226501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,8192,0.14188799262046814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,10240,0.17340800166130066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,16384,0.2688960134983063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,4096,0.0796160027384758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,7168,0.12614400684833527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,5120,0.09577599912881851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,3584,0.07052800059318542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,65536,2.044543981552124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,2560,0.054816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,2048,0.047040000557899475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,3072,0.06371200084686279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,1536,0.04022400081157684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,512,0.02409599907696247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,1024,0.03286400064826012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,128,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,256,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,65536,1.5348479747772217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,8192,0.14163200557231903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,10240,0.1738560050725937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,12288,0.20553599298000336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,7168,0.12755200266838074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,16384,0.26848000288009644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,5120,0.09670399874448776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,4096,0.07929600030183792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,3584,0.07206399738788605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,3072,0.06310400366783142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,65536,1.5343999862670898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,2560,0.055296000093221664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,1024,0.032287999987602234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,2048,0.047807998955249786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,1536,0.03993599861860275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,512,0.02412799932062626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,256,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,128,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,8192,0.11977600306272507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,7168,0.1080000028014183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,10240,0.14860799908638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,12288,0.17417599260807037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,65536,1.025823950767517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,4096,0.06601600348949432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,16384,0.23136000335216522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,5120,0.07980799674987793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,3072,0.052960000932216644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,3584,0.05958399921655655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,2560,0.04694399982690811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,1024,0.028543999418616295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,512,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,2048,0.03948799893260002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,1536,0.032416000962257385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,128,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,256,0.0163199994713068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,8192,0.07379200309515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,7168,0.06684800237417221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,12288,0.10534399747848511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,10240,0.09094399958848953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,16384,0.1372479945421219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,3584,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,4096,0.043168000876903534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,5120,0.0504320003092289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,3072,0.034752000123262405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,65536,1.024832010269165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,1024,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,2048,0.026847999542951584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,2560,0.03219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,1536,0.02332800067961216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,512,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,128,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,256,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,12288,0.10627199709415436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,10240,0.0899519994854927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,8192,0.07379200309515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,7168,0.06617599725723267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,16384,0.13686400651931763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,3584,0.03907199949026108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,3072,0.03452799841761589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,4096,0.04307200014591217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,65536,0.9365440011024475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,5120,0.050464000552892685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,2560,0.031136000528931618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,256,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,2048,0.02672000043094158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,512,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,1024,0.020160000771284103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,1536,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,128,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,8192,0.07420799881219864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,65536,0.5155199766159058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,12288,0.10492800176143646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,7168,0.06611199676990509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,10240,0.0894400030374527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,16384,0.13792000710964203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,4096,0.04255999997258186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,2560,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,3072,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,5120,0.05177599936723709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,3584,0.03836800158023834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,2048,0.025631999596953392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,256,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,1536,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,128,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,512,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,1024,0.01833599992096424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,7168,0.062144000083208084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,8192,0.06950400024652481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,65536,0.5171200037002563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,12288,0.09932799637317657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,10240,0.08390399813652039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,16384,0.13068799674510956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,3072,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,3584,0.036288000643253326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,4096,0.044895999133586884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,2560,0.028831999748945236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,5120,0.047200001776218414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,1536,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,2048,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,512,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,256,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,1024,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,128,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,8192,0.053279999643564224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,7168,0.0541439987719059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,12288,0.07628799974918365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,10240,0.06505600363016129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,65536,0.5162559747695923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,16384,0.09782399982213974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,5120,0.0395519994199276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,4096,0.03311999887228012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,3584,0.02848000079393387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,2560,0.024064000695943832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,3072,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,2048,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,1536,0.017343999817967415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,512,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,1024,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,256,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,128,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,65536,0.5154560208320618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,7168,0.03923200070858002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,8192,0.043807998299598694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,12288,0.061344001442193985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,10240,0.05302400141954422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,16384,0.0793600007891655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,5120,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,4096,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,3072,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,3584,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,2560,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,2048,0.017343999817967415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,1536,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,1024,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,512,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,256,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,128,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,65536,0.37091198563575745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,7168,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,8192,0.032735999673604965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,12288,0.04483199864625931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,10240,0.03872000053524971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,16384,0.05711999908089638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,5120,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,3584,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,3072,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,4096,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,2560,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,1536,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,2048,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,512,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,1024,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,65536,0.32467201352119446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,7168,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,8192,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,12288,0.02876799926161766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,10240,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,16384,0.03532800078392029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,5120,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,4096,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,3072,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,3584,0.014879999682307243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,2560,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,1536,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,128,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,65536,0.201664000749588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,7168,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,5120,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,8192,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,12288,0.04374400153756142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,10240,0.03753599897027016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,16384,0.05657599866390228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,3584,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,4096,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,1536,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,2560,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,3072,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,2048,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,1024,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,256,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,512,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,65536,0.1173119992017746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,12288,0.043935999274253845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,8192,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,7168,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,5120,0.01651199907064438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,10240,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,16384,0.056384000927209854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,4096,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,2560,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,2048,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,1536,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,3584,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,3072,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,1024,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,512,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,65536,0.11657600104808807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,65536,0.11689600348472595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,4096,0.4445759952068329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,5120,0.5369600057601929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,7168,0.7284479737281799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,3584,0.3949440121650696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,8192,0.8198080062866211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,3072,0.349727988243103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,2560,0.3004480004310608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,512,0.11417599767446518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,1536,0.20745599269866943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,256,0.08313599973917007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,2048,0.2518720030784607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,1024,0.16380800306797028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,10240,1.0100480318069458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,128,0.06659200042486191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,12288,1.2047359943389893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,8192,0.20953600108623505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,7168,0.18873600661754608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,5120,0.13939200341701508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,10240,0.2577599883079529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,4096,0.1157120019197464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,12288,0.3055360019207001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,3584,0.10454399883747101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,2560,0.07980799674987793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,1536,0.056832000613212585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,3072,0.09193599969148636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,2048,0.06800000369548798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,16384,0.4000000059604645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,256,0.02611199952661991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,512,0.03302399814128876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,1024,0.045024000108242035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,128,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,16384,1.5808320045471191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,5120,0.13884800672531128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,7168,0.18681600689888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,8192,0.2101760059595108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,10240,0.2571200132369995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,12288,0.304064005613327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,4096,0.11542399972677231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,3072,0.09139200299978256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,3584,0.1034879982471466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,16384,0.39824000000953674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,2560,0.07929600030183792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,2048,0.06771200150251389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,1536,0.056352000683546066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,1024,0.04383999854326248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,128,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,512,0.03206399828195572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,256,0.025919999927282333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,12288,0.20502400398254395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,7168,0.12665599584579468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,8192,0.14233599603176117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,10240,0.173567995429039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,5120,0.09529600292444229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,4096,0.07971200346946716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,3584,0.07145600020885468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,3072,0.06364800035953522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,2560,0.05536000058054924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,16384,0.2680000066757202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,512,0.02579200081527233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,1024,0.036959998309612274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,256,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,2048,0.047231998294591904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,1536,0.04032000154256821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,128,0.017343999817967415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,10240,0.17334400117397308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,8192,0.14179199934005737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,12288,0.2051839977502823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,16384,0.2680639922618866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,4096,0.07894399762153625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,5120,0.09494400024414062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,7168,0.12703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,3584,0.07145600020885468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,2560,0.05488000065088272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,3072,0.06265600025653839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,65536,1.5389440059661865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,2048,0.04726399853825569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,1536,0.04131200164556503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,1024,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,512,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,256,0.020128000527620316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,128,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,65536,1.5343680381774902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,12288,0.20508800446987152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,7168,0.1257600039243698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,8192,0.14195199310779572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,10240,0.17388799786567688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,5120,0.09532800316810608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,16384,0.2687680125236511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,4096,0.07833600044250488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,3584,0.07135999947786331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,3072,0.06275200098752975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,65536,1.025056004524231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,2560,0.05500800162553787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,2048,0.047968000173568726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,1536,0.03996799886226654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,1024,0.032575998455286026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,256,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,128,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,512,0.024191999807953835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,7168,0.08956799656152725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,8192,0.09929600358009338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,12288,0.14399999380111694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,10240,0.1223360002040863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,65536,1.025056004524231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,16384,0.18911999464035034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,3584,0.05023999884724617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,4096,0.05484800040721893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,3072,0.04467200115323067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,5120,0.06646399945020676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,2560,0.04460800066590309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,2048,0.037728000432252884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,512,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,128,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,256,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,1536,0.03209599852561951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,1024,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,10240,0.0896959975361824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,7168,0.0663359984755516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,8192,0.07379200309515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,12288,0.10534399747848511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,16384,0.1369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,5120,0.04992000013589859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,3072,0.03478400036692619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,3584,0.03939199820160866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,4096,0.04262400045990944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,2560,0.03046399913728237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,65536,1.0250240564346313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,1536,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,2048,0.027264000847935677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,512,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,1024,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,256,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,128,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,8192,0.0740479975938797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,12288,0.10473600029945374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,7168,0.06623999774456024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,10240,0.08975999802350998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,65536,0.9136639833450317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,16384,0.1366720050573349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,5120,0.04992000013589859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,3584,0.03932800143957138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,4096,0.04287999868392944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,3072,0.03478400036692619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,2560,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,1536,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,2048,0.026655999943614006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,512,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,256,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,1024,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,128,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,12288,0.10499200224876404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,65536,0.5152639746665955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,8192,0.07417599856853485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,7168,0.06592000275850296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,10240,0.08934400230646133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,4096,0.042208001017570496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,5120,0.051263999193906784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,16384,0.1371839940547943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,3072,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,3584,0.03872000053524971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,2560,0.029920000582933426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,2048,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,512,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,1024,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,1536,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,256,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,128,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,8192,0.05753599852323532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,10240,0.069023996591568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,7168,0.05104000121355057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,12288,0.08249600231647491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,65536,0.5154560208320618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,5120,0.04601600021123886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,16384,0.10796800255775452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,4096,0.033504001796245575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,2560,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,3584,0.03478400036692619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,3072,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,1536,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,2048,0.02457600086927414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,1024,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,512,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,128,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,256,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,65536,0.5151680111885071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,7168,0.053279999643564224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,12288,0.0759039968252182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,8192,0.05321599915623665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,10240,0.06480000168085098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,16384,0.09824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,2560,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,5120,0.03961599990725517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,4096,0.033663999289274216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,3584,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,3072,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,2048,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,1536,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,1024,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,512,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,256,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,128,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,65536,0.5151039958000183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,12288,0.06063999980688095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,7168,0.039455998688936234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,8192,0.04297599941492081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,10240,0.052000001072883606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,16384,0.07791999727487564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,5120,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,4096,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,3584,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,3072,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,2560,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,2048,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,1536,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,1024,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,512,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,256,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,128,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,65536,0.3645760118961334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,12288,0.04473600164055824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,8192,0.03248000144958496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,7168,0.029503999277949333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,10240,0.038656000047922134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,16384,0.057023998349905014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,5120,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,4096,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,3072,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,1536,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,1024,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,256,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,512,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,65536,0.29340800642967224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,12288,0.02876799926161766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,16384,0.03516799956560135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,8192,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,7168,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,10240,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,5120,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,4096,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,3584,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,2048,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,2560,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,3072,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,1536,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,1024,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,65536,0.20163199305534363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,12288,0.043168000876903534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,8192,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,10240,0.03747199848294258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,7168,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,5120,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,16384,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,3584,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,4096,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,3072,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,2048,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,2560,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,1536,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,512,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,1024,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,65536,0.11680000275373459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,12288,0.04335999861359596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,7168,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,8192,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,10240,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,16384,0.03500799834728241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,5120,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,2560,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,4096,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,3072,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,3584,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,2048,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,1536,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,512,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,1024,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,65536,0.11648000031709671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,65536,0.11686400324106216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,5120,0.36102399230003357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,4096,0.2972800135612488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,7168,0.49139198660850525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,8192,0.54995197057724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,3072,0.2349119931459427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,2560,0.2043839991092682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,3584,0.26607999205589294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,2048,0.1737920045852661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,10240,0.6765760183334351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,1536,0.14112000167369843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,256,0.05894400179386139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,512,0.07785599678754807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,1024,0.1085439994931221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,128,0.04825599864125252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,12288,0.8052800297737122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,5120,0.09574399888515472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,7168,0.12691199779510498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,8192,0.14220799505710602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,10240,0.17347200214862823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,12288,0.20710399746894836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,16384,1.0592960119247437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,3072,0.06351999938488007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,3584,0.07145600020885468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,4096,0.07894399762153625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,16384,0.27027198672294617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,2048,0.048767998814582825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,2560,0.05548800155520439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,512,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,1536,0.04089599847793579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,1024,0.03392000123858452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,128,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,256,0.020640000700950623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,5120,0.09340800344944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,7168,0.12620800733566284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,8192,0.14166399836540222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,12288,0.20524799823760986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,10240,0.174112007021904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,4096,0.07664000242948532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,3072,0.06124800071120262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,3584,0.07046400010585785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,16384,0.26764801144599915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,2560,0.05299200117588043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,2048,0.045791998505592346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,512,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,1024,0.02956799976527691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,128,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,256,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,1536,0.0387520007789135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,7168,0.10819199681282043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,8192,0.12310399860143661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,12288,0.17641599476337433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,10240,0.14819200336933136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,16384,0.2356480062007904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,3584,0.06163199990987778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,3072,0.0525440014898777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,4096,0.06524799764156342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,5120,0.07919999957084656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,2560,0.0459199994802475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,2048,0.039583999663591385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,1536,0.03641600161790848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,512,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,1024,0.028575999662280083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,256,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,128,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,65536,1.0261119604110718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,12288,0.10592000186443329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,8192,0.07407999783754349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,10240,0.0907519981265068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,16384,0.13753600418567657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,7168,0.06659200042486191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,4096,0.04291199892759323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,5120,0.050783999264240265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,3584,0.039103999733924866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,65536,1.0284479856491089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,3072,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,2560,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,2048,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,1024,0.020800000056624413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,1536,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,128,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,512,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,256,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,12288,0.10559999942779541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,7168,0.06604799628257751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,8192,0.07395199686288834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,65536,0.9390079975128174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,10240,0.0896959975361824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,16384,0.1371839940547943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,3072,0.035392001271247864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,5120,0.049984000623226166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,2560,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,3584,0.03859199956059456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,2048,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,4096,0.04259200021624565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,512,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,128,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,1536,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,256,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,1024,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,65536,0.5155519843101501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,7168,0.061792001128196716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,12288,0.09910400211811066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,8192,0.07004799693822861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,10240,0.0841279998421669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,5120,0.047200001776218414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,16384,0.13436800241470337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,3584,0.0363520011305809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,4096,0.0395519994199276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,2560,0.029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,3072,0.032416000962257385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,2048,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,1536,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,1024,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,512,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,256,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,128,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,12288,0.07631999999284744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,65536,0.5157439708709717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,7168,0.0488319993019104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,8192,0.05375999957323074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,10240,0.06518399715423584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,16384,0.09942399710416794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,4096,0.0350399985909462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,5120,0.03734400123357773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,3584,0.028704000636935234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,2048,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,3072,0.027744000777602196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,2560,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,1536,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,1024,0.01692800037562847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,256,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,512,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,128,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,65536,0.5150719881057739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,12288,0.07622399926185608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,7168,0.048287998884916306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,8192,0.053279999643564224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,10240,0.0650240033864975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,5120,0.04108799993991852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,16384,0.09788800030946732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,4096,0.033695999532938004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,2560,0.024064000695943832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,3584,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,1536,0.019392000511288643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,3072,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,2048,0.022143999114632607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,1024,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,512,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,128,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,65536,0.3874559998512268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,256,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,12288,0.06070400029420853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,5120,0.030848000198602676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,7168,0.03961599990725517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,8192,0.044576000422239304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,10240,0.053568001836538315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,16384,0.08166400343179703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,3072,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,2048,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,2560,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,3584,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,4096,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,1536,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,256,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,512,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,1024,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,128,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,65536,0.37356799840927124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,8192,0.04320000112056732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,12288,0.060095999389886856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,10240,0.05206400156021118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,7168,0.039903998374938965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,5120,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,16384,0.07823999971151352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,4096,0.0261439997702837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,3584,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,3072,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,2560,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,2048,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,1536,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,512,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,1024,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,256,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,128,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,65536,0.31228798627853394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,12288,0.04476799815893173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,8192,0.03286400064826012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,10240,0.038495998829603195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,7168,0.029600000008940697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,5120,0.02396799996495247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,16384,0.056992001831531525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,3584,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,2560,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,1536,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,2048,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,512,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,256,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,1024,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,65536,0.3011839985847473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,12288,0.044544000178575516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,8192,0.03340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,10240,0.04016000032424927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,5120,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,7168,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,16384,0.05686400085687637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,4096,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,3584,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,3072,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,2560,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,2048,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,1536,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,512,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,1024,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,65536,0.20150400698184967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,8192,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,7168,0.02038400061428547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,12288,0.028672000393271446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,10240,0.025760000571608543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,5120,0.01651199907064438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,16384,0.035711999982595444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,2560,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,4096,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,3072,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,3584,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,2048,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,1536,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,1024,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,256,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,65536,0.2019519954919815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,12288,0.02816000021994114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,10240,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,7168,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,8192,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,16384,0.056352000683546066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,5120,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,2048,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,3584,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,4096,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,2560,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,1536,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,3072,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,256,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,65536,0.11740799993276596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,512,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,1024,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,7168,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,12288,0.043007999658584595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,5120,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,8192,0.03171199932694435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,10240,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,16384,0.05555199831724167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,2560,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,4096,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,3584,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,3072,0.013183999806642532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,2048,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,1536,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,512,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,65536,0.11667200177907944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,1024,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,12288,0.04374400153756142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,7168,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,5120,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,8192,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,10240,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,16384,0.05548800155520439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,3584,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,4096,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,2048,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,3072,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,2560,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,1536,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,512,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,1024,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,65536,0.11628799885511398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,5120,0.2728959918022156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,65536,0.11638399958610535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,4096,0.22511999309062958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,3584,0.266400009393692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,7168,0.3687039911746979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,3072,0.23375999927520752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,8192,0.4134080111980438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,2560,0.20236800611019135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,1536,0.13811199367046356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,512,0.07548800110816956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,256,0.046720001846551895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,2048,0.17078399658203125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,10240,0.5138880014419556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,1024,0.08416000008583069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,128,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,12288,0.604095995426178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,8192,0.14185599982738495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,7168,0.1265919953584671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,10240,0.1748799979686737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,5120,0.09494400024414062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,12288,0.20524799823760986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,16384,0.794592022895813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,4096,0.07980799674987793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,2048,0.04806400090456009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,16384,0.26790401339530945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,3584,0.07196799665689468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,2560,0.056223999708890915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,3072,0.06419199705123901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,128,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,256,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,512,0.024607999250292778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,1536,0.04044799879193306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,1024,0.03347200155258179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,8192,0.14102399349212646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,5120,0.0799039974808693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,7168,0.1258240044116974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,12288,0.20502400398254395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,10240,0.17427200078964233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,16384,0.2682879865169525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,4096,0.06598400324583054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,3584,0.062272001057863235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,3072,0.05350400134921074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,2560,0.04630399867892265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,1536,0.036768000572919846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,1024,0.027583999559283257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,2048,0.039423998445272446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,512,0.020096000283956528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,128,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,256,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,65536,1.0254080295562744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,8192,0.11795199662446976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,7168,0.09247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,12288,0.15139199793338776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,10240,0.12531200051307678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,16384,0.22764800488948822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,5120,0.0671359971165657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,4096,0.055135998874902725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,3072,0.04572800174355507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,3584,0.05363199859857559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,2560,0.04025600105524063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,2048,0.034015998244285583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,256,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,128,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,1536,0.03324799984693527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,1024,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,512,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,65536,1.0257279872894287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,7168,0.06646399945020676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,8192,0.0737600028514862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,12288,0.10569600015878677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,10240,0.08988799899816513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,5120,0.05071999877691269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,16384,0.1377280056476593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,3584,0.03932800143957138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,4096,0.04224000126123428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,3072,0.037151999771595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,2560,0.03299200162291527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,2048,0.028255999088287354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,1536,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,1024,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,512,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,256,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,128,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,7168,0.06668800115585327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,8192,0.0735040009021759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,12288,0.1053759977221489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,10240,0.0899519994854927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,16384,0.1372479945421219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,65536,0.8714240193367004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,4096,0.04278400167822838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,5120,0.05023999884724617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,3584,0.03916800022125244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,3072,0.037087999284267426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,2048,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,2560,0.03267199918627739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,1536,0.024224000051617622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,128,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,512,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,256,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,1024,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,12288,0.08476799726486206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,7168,0.05350400134921074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,8192,0.05993599817156792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,10240,0.07199999690055847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,65536,0.5161920189857483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,16384,0.11132799834012985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,5120,0.041152000427246094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,4096,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,3584,0.03379200026392937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,1536,0.021695999428629875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,2560,0.028704000636935234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,2048,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,3072,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,256,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,512,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,1024,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,128,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,65536,0.5157439708709717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,12288,0.07583999633789062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,16384,0.0979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,7168,0.04848000034689903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,8192,0.05344000086188316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,10240,0.06534399837255478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,5120,0.04095999896526337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,4096,0.03446400165557861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,3584,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,1024,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,2560,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,3072,0.02956799976527691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,1536,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,2048,0.022272000089287758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,512,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,256,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,128,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,65536,0.5154880285263062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,10240,0.06496000289916992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,12288,0.07577600330114365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,8192,0.053568001836538315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,16384,0.09747199714183807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,7168,0.04800000041723251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,5120,0.03996799886226654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,4096,0.03417599946260452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,3584,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,3072,0.028672000393271446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,2560,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,2048,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,1536,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,256,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,1024,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,512,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,65536,0.365119993686676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,12288,0.06060799956321716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,5120,0.03046399913728237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,7168,0.03859199956059456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,10240,0.05129599943757057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,8192,0.04358400031924248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,4096,0.026496000587940216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,3584,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,16384,0.07788799703121185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,2560,0.020096000283956528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,2048,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,1536,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,3072,0.022752000018954277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,1024,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,512,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,256,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,65536,0.36422398686408997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,8192,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,16384,0.058079998940229416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,12288,0.050655998289585114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,65536,0.2919999957084656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,7168,0.03232000023126602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,10240,0.03993599861860275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,5120,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,4096,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,3072,0.019039999693632126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,3584,0.020160000771284103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,1536,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,2048,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,512,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,1024,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,2560,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,256,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,7168,0.02956799976527691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,12288,0.044576000422239304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,8192,0.03286400064826012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,10240,0.038495998829603195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,65536,0.2075520008802414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,5120,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,16384,0.05628800019621849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,3584,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,4096,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,2560,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,3072,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,2048,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,1536,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,512,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,256,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,1024,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,12288,0.04438399896025658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,10240,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,8192,0.0326399989426136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,7168,0.029983999207615852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,5120,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,16384,0.0568000003695488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,65536,0.20163199305534363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,3584,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,3072,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,2560,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,1536,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,2048,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,1024,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,256,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,12288,0.02831999957561493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,8192,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,7168,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,10240,0.024671999737620354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,5120,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,16384,0.043007999658584595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,3584,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,3072,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,4096,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,1536,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,2560,0.013183999806642532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,2048,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,512,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,1024,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,256,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,65536,0.20227199792861938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,128,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,12288,0.02812799997627735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,8192,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,10240,0.03743999823927879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,7168,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,16384,0.03494400158524513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,5120,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,4096,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,3072,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,3584,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,1536,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,2048,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,2560,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,512,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,65536,0.11708799749612808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,1024,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,12288,0.043455999344587326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,8192,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,5120,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,65536,0.1170239970088005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,16384,0.055135998874902725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,7168,0.02816000021994114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,10240,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,4096,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,3584,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,3072,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,2560,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,1536,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,1024,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,2048,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,512,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,12288,0.04355200007557869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,8192,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,5120,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,10240,0.03827200084924698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,7168,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,65536,0.11654400080442429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,16384,0.05558399856090546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,4096,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,3584,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,2048,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,2560,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,3072,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,1024,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,512,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,4096,0.1528960019350052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,65536,0.1162559986114502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,5120,0.1847359985113144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,7168,0.25071999430656433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,8192,0.2810559868812561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,3584,0.13769599795341492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,3072,0.12214399874210358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,10240,0.3438720107078552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,2560,0.106175996363163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,2048,0.09065599739551544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,12288,0.41071999073028564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,1536,0.07523199915885925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,512,0.04294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,128,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,256,0.034752000123262405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,1024,0.05862399935722351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,5120,0.05049600079655647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,16384,0.5349119901657104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,7168,0.06668800115585327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,8192,0.07430399954319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,12288,0.10681600123643875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,10240,0.09120000153779984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,4096,0.04374400153756142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,16384,0.13862399756908417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,3584,0.03993599861860275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,2560,0.03190400078892708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,3072,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,1536,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,2048,0.028031999245285988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,1024,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,512,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,256,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,128,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,8192,0.07385600358247757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,12288,0.1069440022110939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,5120,0.05056000128388405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,10240,0.09001599997282028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,7168,0.0663679987192154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,16384,0.13673600554466248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,2560,0.03177599981427193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,4096,0.043136000633239746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,3584,0.03920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,3072,0.03558399900794029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,2048,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,1536,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,512,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,1024,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,256,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,128,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,12288,0.09884800016880035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,7168,0.06217600032687187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,8192,0.07017599791288376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,10240,0.08454400300979614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,16384,0.13100799918174744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,3072,0.03385600075125694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,5120,0.04755200073122978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,3584,0.03734400123357773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,4096,0.04073600098490715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,65536,0.517952024936676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,2560,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,2048,0.028960000723600388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,1536,0.022816000506281853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,512,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,1024,0.018239999189972878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,256,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,128,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,8192,0.0575999990105629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,65536,0.5173760056495667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,12288,0.07955200225114822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,10240,0.06851200014352798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,7168,0.053408000618219376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,16384,0.11151999980211258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,5120,0.036959998309612274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,4096,0.03222399950027466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,3584,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,3072,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,2560,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,2048,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,1536,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,1024,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,512,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,128,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,256,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,65536,0.5160959959030151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,7168,0.0517439991235733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,8192,0.05398400127887726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,12288,0.07648000121116638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,5120,0.03705599904060364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,10240,0.06579200178384781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,16384,0.1016319990158081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,4096,0.03167999908328056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,1536,0.020191999152302742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,3584,0.03359999880194664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,3072,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,2560,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,2048,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,512,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,128,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,256,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,1024,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,65536,0.41734400391578674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,12288,0.06143999844789505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,5120,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,7168,0.039903998374938965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,16384,0.07894399762153625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,8192,0.04355200007557869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,10240,0.052928000688552856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,4096,0.026784000918269157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,3584,0.025248000398278236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,3072,0.02319999970495701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,2048,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,2560,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,1536,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,1024,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,256,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,65536,0.4113920032978058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,12288,0.044704001396894455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,5120,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,10240,0.03868800029158592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,7168,0.029759999364614487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,8192,0.03286400064826012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,16384,0.0578560009598732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,4096,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,3072,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,3584,0.020096000283956528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,2560,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,2048,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,1536,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,1024,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,256,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,512,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,65536,0.32396799325942993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,12288,0.04524800181388855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,8192,0.03296000137925148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,10240,0.0390079990029335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,5120,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,7168,0.03014400042593479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,16384,0.05692800134420395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,4096,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,3584,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,3072,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,2560,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,2048,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,1536,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,1024,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,65536,0.23020799458026886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,512,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,12288,0.04492799937725067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,8192,0.03497600182890892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,7168,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,5120,0.024159999564290047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,10240,0.038495998829603195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,16384,0.05676800012588501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,4096,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,3584,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,3072,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,2560,0.017503999173641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,2048,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,65536,0.20556800067424774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,1024,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,512,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,1536,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,256,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,12288,0.04476799815893173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,7168,0.02956799976527691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,5120,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,10240,0.03891199827194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,8192,0.0326399989426136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,16384,0.05676800012588501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,4096,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,2560,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,3584,0.02038400061428547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,3072,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,2048,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,1536,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,1024,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,65536,0.20284800231456757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,512,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,256,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,12288,0.028991999104619026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,7168,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,8192,0.021856000646948814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,10240,0.025855999439954758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,5120,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,16384,0.03625600039958954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,4096,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,3072,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,3584,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,2560,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,65536,0.201664000749588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,1536,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,1024,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,256,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,7168,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,8192,0.032287999987602234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,12288,0.02816000021994114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,10240,0.037567999213933945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,5120,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,16384,0.03532800078392029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,3584,0.014879999682307243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,4096,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,2560,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,3072,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,2048,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,1536,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,1024,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,65536,0.1189119964838028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,12288,0.0432640016078949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,8192,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,10240,0.03766399994492531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,5120,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,7168,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,16384,0.03494400158524513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,4096,0.015807999297976494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,3584,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,3072,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,1536,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,2560,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,2048,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,1024,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,65536,0.11804799735546112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,256,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,128,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,12288,0.04297599941492081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,5120,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,7168,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,10240,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,8192,0.03177599981427193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,16384,0.034432001411914825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,4096,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,3584,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,2560,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,3072,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,2048,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,1536,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,65536,0.11689600348472595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,1024,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,12288,0.04355200007557869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,5120,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,8192,0.03251200169324875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,7168,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,10240,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,16384,0.05769599974155426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,2560,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,3584,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,3072,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,2048,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,4096,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,65536,0.11654400080442429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,1536,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,12288,0.04531199857592583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,5120,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,7168,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,8192,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,10240,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,16384,0.05728000029921532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,3584,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,3072,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,2560,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,4096,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,2048,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,65536,0.11628799885511398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,1536,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,5120,0.1833599954843521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,65536,0.21187199652194977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,7168,0.24716800451278687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,4096,0.15241600573062897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,3072,0.12019199877977371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,2560,0.10515200346708298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,3584,0.1380160003900528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,8192,0.28009599447250366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,2048,0.09100800007581711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,1536,0.07452800124883652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,1024,0.05833600088953972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,10240,0.3404799997806549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,512,0.04287999868392944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,256,0.027807999402284622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,128,0.022592000663280487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,12288,0.40406399965286255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,12288,0.10700800269842148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,7168,0.06761600077152252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,8192,0.07622399926185608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,10240,0.08988799899816513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,5120,0.0514880008995533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,16384,0.5320960283279419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,4096,0.04339199885725975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,16384,0.1369280070066452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,2560,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,3072,0.03484800085425377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,3584,0.03939199820160866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,2048,0.02876799926161766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,1536,0.025728000327944756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,256,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,512,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,128,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,1024,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,5120,0.050464000552892685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,7168,0.06694400310516357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,12288,0.10620799660682678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,8192,0.07497599720954895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,10240,0.09071999788284302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,16384,0.1375039964914322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,2560,0.031136000528931618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,3584,0.040063999593257904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,4096,0.04339199885725975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,3072,0.03440000116825104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,2048,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,128,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,512,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,1024,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,256,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,1536,0.0226879995316267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,8192,0.06335999816656113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,7168,0.06217600032687187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,12288,0.0878399983048439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,10240,0.07427199929952621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,65536,0.5179200172424316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,16384,0.11462400108575821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,5120,0.046720001846551895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,3072,0.03190400078892708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,3584,0.035711999982595444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,4096,0.03984000161290169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,2560,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,2048,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,512,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,1024,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,1536,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,65536,0.5186560153961182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,128,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,256,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,10240,0.06537599861621857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,12288,0.07606399804353714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,5120,0.03680000081658363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,8192,0.054016001522541046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,7168,0.05011200159788132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,16384,0.11097600311040878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,4096,0.03190400078892708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,3584,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,2560,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,3072,0.02828799933195114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,2048,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,1536,0.019807999953627586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,512,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,1024,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,256,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,128,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,65536,0.5181120038032532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,8192,0.0541439987719059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,12288,0.07561600208282471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,7168,0.04915200173854828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,10240,0.06511999666690826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,16384,0.10409600287675858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,5120,0.03651199862360954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,4096,0.03167999908328056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,3584,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,1536,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,2560,0.02566399984061718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,3072,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,2048,0.023072000592947006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,512,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,1024,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,256,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,128,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,65536,0.39577600359916687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,12288,0.04598399996757507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,7168,0.03161599859595299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,8192,0.03558399900794029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,4096,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,16384,0.058559998869895935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,10240,0.039712000638246536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,5120,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,3584,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,3072,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,2560,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,2048,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,1024,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,1536,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,128,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,256,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,512,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,65536,0.37296000123023987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,12288,0.0451200008392334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,65536,0.21459199488162994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,8192,0.033535998314619064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,16384,0.05689600110054016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,7168,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,10240,0.038975998759269714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,5120,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,4096,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,1536,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,3584,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,2560,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,2048,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,1024,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,256,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,512,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,12288,0.04492799937725067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,8192,0.033344000577926636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,5120,0.024800000712275505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,7168,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,16384,0.057151999324560165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,10240,0.038816001266241074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,4096,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,3584,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,2048,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,2560,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,1536,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,3072,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,65536,0.20361599326133728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,512,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,1024,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,256,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,128,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,12288,0.04419200122356415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,5120,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,7168,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,8192,0.03670400008559227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,10240,0.03852799907326698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,16384,0.05689600110054016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,4096,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,3584,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,2048,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,2560,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,65536,0.20390400290489197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,1536,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,1024,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,512,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,256,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,5120,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,10240,0.026784000918269157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,7168,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,8192,0.03484800085425377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,12288,0.03126399964094162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,16384,0.038943998515605927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,2560,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,3584,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,3072,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,4096,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,2048,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,1536,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,1024,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,512,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,256,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,65536,0.2035199999809265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,12288,0.028224000707268715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,5120,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,8192,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,10240,0.025087999179959297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,7168,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,16384,0.03590400144457817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,4096,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,3584,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,3072,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,2560,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,2048,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,1024,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,1536,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,65536,0.1300159990787506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,128,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,12288,0.04371200129389763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,5120,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,7168,0.030688000842928886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,8192,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,10240,0.038816001266241074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,16384,0.05740800127387047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,4096,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,2560,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,3072,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,3584,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,1536,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,2048,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,1024,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,65536,0.11955200135707855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,12288,0.045504000037908554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,8192,0.022784000262618065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,7168,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,10240,0.03776000067591667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,5120,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,16384,0.056992001831531525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,4096,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,3584,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,3072,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,2048,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,1536,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,2560,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,1024,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,65536,0.11856000125408173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,10240,0.02457600086927414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,12288,0.044415999203920364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,7168,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,8192,0.022911999374628067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,5120,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,16384,0.057312000542879105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,4096,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,3584,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,3072,0.012992000207304955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,65536,0.11718399822711945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,2048,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,2560,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,1536,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,1024,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,65536,0.11715199798345566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,12288,0.04553600028157234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,5120,0.022336000576615334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,7168,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,8192,0.022624000906944275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,16384,0.0578560009598732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,10240,0.025119999423623085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,4096,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,3072,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,2048,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,2560,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,1024,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,3584,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,1536,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,256,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,12288,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,10240,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,5120,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,7168,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,8192,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,16384,0.058240000158548355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,3072,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,2560,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,4096,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,3584,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,2048,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,65536,0.11763200163841248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,512,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,1024,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,4096,0.15199999511241913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,65536,0.21187199652194977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,5120,0.18361599743366241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,7168,0.24764800071716309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,16384,0.5284799933433533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,3072,0.11984000355005264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,3584,0.13619199395179749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,8192,0.2776640057563782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,10240,0.3420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,256,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,2560,0.10524799674749374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,1024,0.05718399956822395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,512,0.041439998894929886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,1536,0.07360000163316727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,2048,0.09017600119113922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,12288,0.404992014169693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,128,0.027168000116944313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,5120,0.051072001457214355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,7168,0.06752000004053116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,8192,0.07436800003051758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,4096,0.04243199899792671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,12288,0.10777600109577179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,10240,0.09200000017881393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,3072,0.034912001341581345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,3584,0.03916800022125244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,2048,0.026688000187277794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,1536,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,2560,0.031072000041604042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,16384,0.13699199259281158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,1024,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,512,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,128,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,256,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,5120,0.050944000482559204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,8192,0.07353600114583969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,12288,0.10630399733781815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,10240,0.09151999652385712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,7168,0.06729599833488464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,16384,0.13705599308013916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,4096,0.04195199906826019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,3584,0.03852799907326698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,65536,0.5176960229873657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,3072,0.03455999866127968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,2048,0.02566399984061718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,2560,0.03017600066959858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,1536,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,512,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,1024,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,256,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,128,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,12288,0.09731200337409973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,10240,0.08531200140714645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,8192,0.06627199798822403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,7168,0.06070400029420853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,5120,0.045823998749256134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,4096,0.03932800143957138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,3584,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,16384,0.12479999661445618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,2560,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,3072,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,2048,0.024671999737620354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,1536,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,1024,0.01817600056529045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,256,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,512,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,12288,0.07667200267314911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,16384,0.10175999999046326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,10240,0.0655359998345375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,8192,0.05273599922657013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,7168,0.048928000032901764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,65536,0.5171840190887451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,3584,0.028543999418616295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,4096,0.031136000528931618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,5120,0.03651199862360954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,2560,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,3072,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,2048,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,1536,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,512,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,1024,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,256,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,16384,0.09795200079679489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,12288,0.07711999863386154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,8192,0.05283199995756149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,65536,0.5155839920043945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,10240,0.06572800129652023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,7168,0.04896000027656555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,5120,0.036607999354600906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,4096,0.03177599981427193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,2560,0.025696000084280968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,3584,0.03203200176358223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,3072,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,2048,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,1536,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,1024,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,512,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,256,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,65536,0.3792960047721863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,7168,0.03094400092959404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,8192,0.03296000137925148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,12288,0.04835199937224388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,10240,0.043136000633239746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,5120,0.02687999978661537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,16384,0.05795200169086456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,3072,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,3584,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,4096,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,2560,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,2048,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,1536,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,1024,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,65536,0.3644160032272339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,7168,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,10240,0.03999999910593033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,8192,0.032287999987602234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,16384,0.05686400085687637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,12288,0.04543999955058098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,65536,0.2078399956226349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,3584,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,2048,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,2560,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,3072,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,5120,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,4096,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,1536,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,512,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,1024,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,128,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,12288,0.04566400125622749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,8192,0.03267199918627739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,5120,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,7168,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,10240,0.039455998688936234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,65536,0.2024960070848465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,16384,0.05663999915122986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,3584,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,4096,0.020320000126957893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,2560,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,3072,0.017503999173641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,512,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,1536,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,1024,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,12288,0.04566400125622749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,8192,0.03347200155258179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,7168,0.03097599931061268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,10240,0.03961599990725517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,3584,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,3072,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,65536,0.20201599597930908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,16384,0.05651199817657471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,5120,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,4096,0.020479999482631683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,128,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,1536,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,256,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,2560,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,2048,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,12288,0.03363199904561043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,65536,0.2019840031862259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,5120,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,8192,0.032896000891923904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,7168,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,10240,0.031168000772595406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,4096,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,16384,0.03961599990725517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,3584,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,3072,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,1024,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,2048,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,2560,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,512,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,1536,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,128,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,7168,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,12288,0.03331200033426285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,8192,0.022336000576615334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,10240,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,5120,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,16384,0.03593600168824196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,3584,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,2560,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,4096,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,65536,0.125791996717453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,3072,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,128,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,512,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,1024,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,256,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,1536,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,2048,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,65536,0.1186240017414093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,12288,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,5120,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,7168,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,10240,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,8192,0.032575998455286026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,16384,0.05875200033187866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,4096,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,3584,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,3072,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,1024,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,2048,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,2560,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,512,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,256,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,128,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,65536,0.11724799871444702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,12288,0.04639999940991402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,8192,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,7168,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,10240,0.03891199827194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,5120,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,16384,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,4096,0.014495999552309513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,2560,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,3072,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,2048,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,1536,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,3584,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,1024,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,256,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,128,0.007584000006318092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,512,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,12288,0.04499199986457825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,10240,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,7168,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,5120,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,8192,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,16384,0.056095998734235764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,3584,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,3072,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,4096,0.01926399953663349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,2048,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,2560,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,1536,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,65536,0.11615999788045883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,128,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,1024,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,512,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,256,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,12288,0.04560000076889992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,7168,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,5120,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,8192,0.021344000473618507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,10240,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,16384,0.05696000158786774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,3584,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,4096,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,2560,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,2048,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,3072,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,65536,0.11631999909877777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,1536,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,512,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,1024,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,256,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,128,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,12288,0.046720001846551895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,8192,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,5120,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,7168,0.03062400035560131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,10240,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,16384,0.03455999866127968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,4096,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,3584,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,2560,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,65536,0.1159679964184761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,3072,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,2048,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,1536,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,512,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,1024,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,256,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,128,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,65536,0.20662400126457214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,4096,0.08799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,5120,0.10927999764680862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,3584,0.08134400099515915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,7168,0.14608000218868256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,8192,0.15654399991035461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,10240,0.20000000298023224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,2560,0.06323199719190598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,2048,0.05632000043988228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,3072,0.07302399724721909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,1024,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,12288,0.23148800432682037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,1536,0.045152001082897186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,256,0.022016000002622604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,128,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,512,0.02579200081527233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,16384,0.2921600043773651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,12288,0.08537600189447403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,10240,0.07180800288915634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,8192,0.06032000109553337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,5120,0.04460800066590309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,7168,0.0551999993622303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,16384,0.11078400164842606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,4096,0.03526400029659271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,3584,0.0344959981739521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,2560,0.02627200074493885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,2048,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,3072,0.02860799990594387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,1536,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,1024,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,256,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,128,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,512,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,12288,0.06400000303983688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,10240,0.05632000043988228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,5120,0.03331200033426285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,7168,0.04335999861359596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,8192,0.046431999653577805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,16384,0.08563199639320374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,3584,0.02691200003027916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,2560,0.021695999428629875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,2048,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,4096,0.029055999591946602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,3072,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,1536,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,256,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,512,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,1024,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,128,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,8192,0.04588799923658371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,12288,0.06332799792289734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,7168,0.04217600077390671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,10240,0.05488000065088272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,16384,0.08179199695587158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,65536,0.43696001172065735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,5120,0.033344000577926636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,3072,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,4096,0.028192000463604927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,3584,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,2560,0.021023999899625778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,512,0.011648000217974186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,1536,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,2048,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,1024,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,256,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,128,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,65536,0.3264000117778778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,12288,0.045184001326560974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,10240,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,8192,0.03324799984693527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,7168,0.03126399964094162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,16384,0.061216000467538834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,5120,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,3584,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,4096,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,3072,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,2048,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,2560,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,1024,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,1536,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,256,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,128,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,512,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,65536,0.30611199140548706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,8192,0.03296000137925148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,12288,0.045343998819589615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,5120,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,7168,0.03145600110292435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,10240,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,16384,0.05843200162053108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,4096,0.021247999742627144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,3072,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,2560,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,3584,0.022272000089287758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,2048,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,1536,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,1024,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,65536,0.22070400416851044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,256,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,512,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,12288,0.04473600164055824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,5120,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,7168,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,10240,0.038943998515605927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,8192,0.03251200169324875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,16384,0.05718399956822395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,4096,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,2560,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,1536,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,3072,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,3584,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,2048,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,65536,0.20576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,1024,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,512,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,256,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,12288,0.031072000041604042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,7168,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,10240,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,8192,0.023072000592947006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,5120,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,16384,0.037728000432252884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,3072,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,3584,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,4096,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,2560,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,2048,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,1536,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,65536,0.20396800339221954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,512,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,12288,0.029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,7168,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,8192,0.023104000836610794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,5120,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,10240,0.026079999282956123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,16384,0.0382080003619194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,3072,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,4096,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,3584,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,2048,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,2560,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,65536,0.12409599870443344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,1536,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,512,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,256,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,5120,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,7168,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,8192,0.03347200155258179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,10240,0.04076800122857094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,12288,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,16384,0.03734400123357773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,4096,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,3584,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,2560,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,3072,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,2048,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,65536,0.12230399996042252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,1536,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,12288,0.04793599992990494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,8192,0.022016000002622604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,5120,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,7168,0.021888000890612602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,10240,0.039744000881910324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,16384,0.03641600161790848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,3584,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,4096,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,3072,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,2048,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,2560,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,65536,0.12108799815177917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,1536,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,1024,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,5120,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,8192,0.02236800082027912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,12288,0.04527999833226204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,7168,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,10240,0.03964800015091896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,16384,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,4096,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,2560,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,3584,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,2048,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,3072,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,65536,0.11945600062608719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,1536,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,1024,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,5120,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,12288,0.045632001012563705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,7168,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,8192,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,10240,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,16384,0.03526400029659271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,4096,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,3072,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,2048,0.012608000077307224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,2560,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,3584,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,65536,0.11884800344705582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,1536,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,128,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,5120,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,7168,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,8192,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,12288,0.04556800052523613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,10240,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,16384,0.03494400158524513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,4096,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,3584,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,3072,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,2048,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,2560,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,65536,0.11798399686813354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,1536,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,1024,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,5120,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,12288,0.04569600149989128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,7168,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,8192,0.022016000002622604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,10240,0.03852799907326698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,16384,0.05788800120353699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,3072,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,4096,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,65536,0.11750400066375732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,2560,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,2048,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,3584,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,1536,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,512,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,1024,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,12288,0.045791998505592346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,7168,0.020255999639630318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,5120,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,8192,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,16384,0.057920001447200775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,10240,0.03948799893260002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,4096,0.015072000212967396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,3584,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,2560,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,65536,0.11817599833011627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,3072,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,1536,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,2048,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,512,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,1024,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,12288,0.04560000076889992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,8192,0.03206399828195572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,16384,0.03574400022625923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,10240,0.038176000118255615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,5120,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,7168,0.030047999694943428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,65536,0.1170559972524643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,4096,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,2560,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,3584,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,3072,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,2048,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,1536,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,256,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,65536,0.1165120005607605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,4096,0.08767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,5120,0.10735999792814255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,7168,0.14156800508499146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,8192,0.15747199952602386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,3584,0.0791039988398552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,3072,0.0713919997215271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,2560,0.06239999830722809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,10240,0.19830399751663208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,12288,0.2337920069694519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,1536,0.042847998440265656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,256,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,512,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,2048,0.05347200110554695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,1024,0.03420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,128,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,16384,0.2893120050430298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,5120,0.040991999208927155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,12288,0.08560000360012054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,7168,0.054336000233888626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,8192,0.06124800071120262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,10240,0.07340800017118454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,4096,0.03340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,16384,0.10790400207042694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,2560,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,3072,0.027712000533938408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,2048,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,1536,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,3584,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,1024,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,512,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,256,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,12288,0.06492800265550613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,8192,0.0480320006608963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,7168,0.04134399816393852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,5120,0.03167999908328056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,10240,0.05696000158786774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,16384,0.08220800012350082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,3072,0.022304000332951546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,3584,0.024992000311613083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,2048,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,4096,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,2560,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,1536,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,1024,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,512,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,256,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,65536,0.44047999382019043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,12288,0.06412799656391144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,5120,0.031328000128269196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,8192,0.045791998505592346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,7168,0.04032000154256821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,10240,0.05571199953556061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,16384,0.07993599772453308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,65536,0.3067840039730072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,3072,0.022592000663280487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,4096,0.02659199945628643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,2048,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,3584,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,1536,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,2560,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,512,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,1024,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,12288,0.04646399989724159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,5120,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,10240,0.039903998374938965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,8192,0.03276799991726875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,7168,0.031039999797940254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,16384,0.056832000613212585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,4096,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,3072,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,3584,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,2560,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,2048,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,1536,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,256,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,128,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,512,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,1024,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,65536,0.2973119914531708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,7168,0.03097599931061268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,12288,0.046271998435258865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,8192,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,10240,0.040863998234272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,16384,0.057631999254226685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,5120,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,3072,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,3584,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,2560,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,4096,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,2048,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,1536,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,512,0.010143999941647053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,1024,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,128,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,65536,0.21254399418830872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,12288,0.044704001396894455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,8192,0.032255999743938446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,10240,0.040672000497579575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,5120,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,4096,0.022336000576615334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,7168,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,16384,0.05711999908089638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,3584,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,3072,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,2560,0.015807999297976494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,2048,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,1536,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,512,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,65536,0.20342400670051575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,128,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,12288,0.032575998455286026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,8192,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,5120,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,16384,0.03680000081658363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,10240,0.0297279991209507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,7168,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,4096,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,2048,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,3584,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,3072,0.014944000169634819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,2560,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,1536,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,65536,0.2011519968509674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,1024,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,512,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,12288,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,8192,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,5120,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,7168,0.022592000663280487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,10240,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,16384,0.036320000886917114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,4096,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,3584,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,2560,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,3072,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,65536,0.12070400267839432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,2048,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,1536,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,1024,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,512,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,256,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,128,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,8192,0.033824000507593155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,7168,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,12288,0.030592000111937523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,10240,0.027871999889612198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,5120,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,16384,0.035232000052928925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,4096,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,3584,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,2560,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,3072,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,2048,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,1536,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,512,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,1024,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,65536,0.1188800036907196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,256,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,128,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,12288,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,10240,0.028511999174952507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,7168,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,8192,0.03404799848794937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,5120,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,16384,0.03510399907827377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,4096,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,3072,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,3584,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,2560,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,65536,0.11865600198507309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,2048,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,1536,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,256,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,1024,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,512,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,128,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,12288,0.028991999104619026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,7168,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,8192,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,10240,0.02707199938595295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,5120,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,16384,0.03699199855327606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,3584,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,4096,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,3072,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,2560,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,2048,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,1536,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,65536,0.11686400324106216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,1024,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,256,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,128,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,512,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,12288,0.046720001846551895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,7168,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,8192,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,5120,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,10240,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,16384,0.0575999990105629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,3072,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,3584,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,65536,0.11715199798345566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,4096,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,2560,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,2048,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,1536,0.01065600011497736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,512,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,256,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,128,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,12288,0.045152001082897186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,7168,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,8192,0.032607998698949814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,16384,0.034752000123262405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,5120,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,10240,0.025248000398278236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,3584,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,2560,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,3072,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,1536,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,4096,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,2048,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,65536,0.1159679964184761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,1024,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,256,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,512,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,128,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,65536,0.11635199934244156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,5120,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,16384,0.05753599852323532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,12288,0.04556800052523613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,7168,0.020320000126957893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,8192,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,10240,0.025248000398278236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,4096,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,3584,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,2048,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,1024,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,3072,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,1536,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,2560,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,512,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,256,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,128,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,12288,0.045823998749256134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,8192,0.020800000056624413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,5120,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,10240,0.039872001856565475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,7168,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,16384,0.05750399827957153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,4096,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,3584,0.018848000094294548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,3072,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,2048,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,2560,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,65536,0.11584000289440155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,1536,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,1024,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,256,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,128,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,512,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,12288,0.04531199857592583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,8192,0.031328000128269196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,10240,0.02486399933695793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,5120,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,7168,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,16384,0.03446400165557861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,3584,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,65536,0.2061759978532791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,3072,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,4096,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,2048,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,2560,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,1536,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,1024,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,256,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,512,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,128,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,5120,0.10608000308275223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,65536,0.11526399850845337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,4096,0.08595199882984161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,10240,0.19964799284934998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,7168,0.1406400054693222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,3072,0.07036799937486649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,3584,0.07884799689054489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,8192,0.15705600380897522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,16384,0.28803199529647827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,2560,0.06099199876189232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,12288,0.2325119972229004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,2048,0.05318399891257286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,1536,0.044895999133586884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,256,0.021344000473618507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,512,0.024800000712275505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,128,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,1024,0.034015998244285583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,7168,0.05459199845790863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,5120,0.04227200150489807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,4096,0.03232000023126602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,8192,0.06006399914622307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,10240,0.0735040009021759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,12288,0.08428800106048584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,16384,0.10704000294208527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,1536,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,2048,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,3072,0.027936000376939774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,3584,0.0307839997112751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,2560,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,1024,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,256,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,128,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,512,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,8192,0.04630399867892265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,5120,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,12288,0.06384000182151794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,7168,0.04092799872159958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,10240,0.054496001452207565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,16384,0.08118399977684021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,4096,0.026847999542951584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,3584,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,2560,0.02070399932563305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,3072,0.023264000192284584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,1536,0.0163199994713068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,2048,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,1024,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,256,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,512,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,128,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,12288,0.0639680027961731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,7168,0.040511999279260635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,10240,0.05411199852824211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,8192,0.04383999854326248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,16384,0.07872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,5120,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,4096,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,65536,0.4196160137653351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,3584,0.024191999807953835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,3072,0.021856000646948814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,2560,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,2048,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,512,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,1024,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,1536,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,128,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,65536,0.2996160089969635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,16384,0.05734400078654289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,12288,0.046592000871896744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,7168,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,8192,0.0326399989426136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,10240,0.04009599983692169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,5120,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,4096,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,3584,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,2560,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,1536,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,3072,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,2048,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,1024,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,65536,0.3007360100746155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,512,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,10240,0.04041599854826927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,8192,0.03283200040459633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,12288,0.045791998505592346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,5120,0.023615999147295952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,7168,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,4096,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,3584,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,16384,0.05718399956822395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,3072,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,2560,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,2048,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,1536,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,1024,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,65536,0.21542400121688843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,512,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,128,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,7168,0.03033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,10240,0.039583999663591385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,12288,0.045632001012563705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,8192,0.032287999987602234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,5120,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,65536,0.2035840004682541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,16384,0.056703999638557434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,3584,0.019711999222636223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,3072,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,2048,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,1024,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,1536,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,256,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,512,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,12288,0.03161599859595299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,8192,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,5120,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,7168,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,16384,0.03670400008559227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,10240,0.02969600073993206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,4096,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,3584,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,2560,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,3072,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,2048,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,1536,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,1024,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,512,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,128,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,65536,0.20076799392700195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,7168,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,8192,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,12288,0.03049599938094616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,5120,0.01744000054895878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,10240,0.028863999992609024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,16384,0.03651199862360954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,3072,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,2560,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,2048,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,4096,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,1536,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,1024,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,512,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,65536,0.12147200107574463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,128,0.007584000006318092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,256,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,65536,0.12041600048542023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,10240,0.043136000633239746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,12288,0.03017600066959858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,16384,0.03574400022625923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,5120,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,7168,0.02409599907696247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,8192,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,3584,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,3072,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,1536,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,2048,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,2560,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,1024,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,4096,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,512,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,256,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,128,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,5120,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,8192,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,7168,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,12288,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,10240,0.04214400053024292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,16384,0.03551999852061272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,3584,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,3072,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,4096,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,2560,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,2048,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,65536,0.11900799721479416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,1024,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,1536,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,128,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,10240,0.040991999208927155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,12288,0.04707200080156326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,8192,0.03254399821162224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,5120,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,7168,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,16384,0.05987200140953064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,3584,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,4096,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,3072,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,2048,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,2560,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,1536,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,1024,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,256,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,512,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,65536,0.11856000125408173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,128,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,12288,0.04524800181388855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,8192,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,7168,0.020479999482631683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,10240,0.04095999896526337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,16384,0.03478400036692619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,5120,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,4096,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,2560,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,3584,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,3072,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,2048,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,1536,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,1024,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,512,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,256,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,65536,0.11664000153541565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,128,0.007584000006318092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,65536,0.11664000153541565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,12288,0.04464000090956688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,8192,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,10240,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,5120,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,7168,0.029759999364614487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,16384,0.034912001341581345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,3584,0.013472000136971474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,4096,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,3072,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,2560,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,2048,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,1024,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,1536,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,512,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,256,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,12288,0.04495999962091446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,8192,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,7168,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,10240,0.03993599861860275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,5120,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,16384,0.05663999915122986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,4096,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,3584,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,3072,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,2560,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,65536,0.11648000031709671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,2048,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,1536,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,512,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,256,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,1024,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,128,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,12288,0.04598399996757507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,8192,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,16384,0.05721599981188774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,10240,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,7168,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,5120,0.0163199994713068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,3584,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,4096,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,2560,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,65536,0.11606399714946747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,3072,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,2048,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,1536,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,1024,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,256,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,512,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,128,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,12288,0.04684799909591675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,16384,0.03440000116825104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,5120,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,8192,0.020640000700950623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,7168,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,10240,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,3072,0.013183999806642532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,2560,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,4096,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,65536,0.11548800021409988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,1536,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,2048,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,3584,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,1024,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,512,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,128,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,256,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,65536,0.11535999923944473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,5120,0.097120001912117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,4096,0.0799039974808693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,7168,0.12806400656700134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,8192,0.14668799936771393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,10240,0.17455999553203583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,3584,0.07184000313282013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,12288,0.20502400398254395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,3072,0.06419199705123901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,2560,0.05718399956822395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,2048,0.048608001321554184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,1536,0.039583999663591385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,128,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,512,0.02319999970495701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,256,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,1024,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,16384,0.2722240090370178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,12288,0.05651199817657471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,5120,0.031072000041604042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,7168,0.03843199834227562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,10240,0.04972799867391586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,8192,0.04387199878692627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,16384,0.07478400319814682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,4096,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,3584,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,2048,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,1024,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,2560,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,3072,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,1536,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,256,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,128,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,12288,0.04800000041723251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,8192,0.03654399886727333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,5120,0.026655999943614006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,10240,0.04297599941492081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,7168,0.033215999603271484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,16384,0.06047999858856201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,3072,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,2048,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,3584,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,2560,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,4096,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,1536,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,1024,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,512,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,256,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,128,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,12288,0.045951999723911285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,10240,0.039423998445272446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,8192,0.03494400158524513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,65536,0.28220799565315247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,16384,0.058848001062870026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,7168,0.03161599859595299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,5120,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,4096,0.022143999114632607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,2560,0.017503999173641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,3584,0.023455999791622162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,3072,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,2048,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,1536,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,512,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,1024,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,256,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,65536,0.2248000055551529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,128,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,12288,0.03574400022625923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,7168,0.026623999699950218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,5120,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,10240,0.0315839983522892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,8192,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,16384,0.04527999833226204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,3584,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,4096,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,2560,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,3072,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,2048,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,1536,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,512,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,256,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,65536,0.20979200303554535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,128,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,12288,0.03392000123858452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,8192,0.026976000517606735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,7168,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,5120,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,10240,0.030047999694943428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,16384,0.043487999588251114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,3584,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,4096,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,3072,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,2560,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,1536,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,65536,0.15094399452209473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,512,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,7168,0.02380800060927868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,8192,0.026367999613285065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,5120,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,12288,0.031231999397277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,10240,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,16384,0.03846399858593941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,3584,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,4096,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,2048,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,1536,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,3072,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,2560,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,1024,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,65536,0.14243200421333313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,5120,0.018688000738620758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,7168,0.02316799946129322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,12288,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,10240,0.02844800055027008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,8192,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,16384,0.03753599897027016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,3072,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,4096,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,3584,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,2560,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,2048,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,1536,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,65536,0.12915199995040894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,512,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,1024,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,5120,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,7168,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,12288,0.028991999104619026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,10240,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,8192,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,16384,0.03638400137424469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,4096,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,3584,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,2560,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,2048,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,65536,0.12358400225639343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,3072,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,1024,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,1536,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,12288,0.04739199951291084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,7168,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,8192,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,10240,0.04041599854826927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,5120,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,16384,0.036448001861572266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,3584,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,4096,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,3072,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,2560,0.014527999795973301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,2048,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,65536,0.12147200107574463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,1536,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,8192,0.02319999970495701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,7168,0.03152000159025192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,12288,0.04678399860858917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,10240,0.026815999299287796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,5120,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,16384,0.03625600039958954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,4096,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,3584,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,2560,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,3072,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,65536,0.12070400267839432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,2048,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,1536,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,1024,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,8192,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,7168,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,10240,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,5120,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,12288,0.04598399996757507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,4096,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,3584,0.014879999682307243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,16384,0.03519999980926514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,3072,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,2048,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,2560,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,65536,0.11958400160074234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,1536,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,1024,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,512,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,128,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,12288,0.0451200008392334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,8192,0.022175999358296394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,10240,0.02579200081527233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,7168,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,5120,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,16384,0.05788800120353699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,4096,0.020479999482631683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,3584,0.019392000511288643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,65536,0.11868800222873688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,3072,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,2560,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,2048,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,1536,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,1024,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,128,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,256,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,12288,0.028063999488949776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,7168,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,10240,0.038816001266241074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,8192,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,5120,0.017311999574303627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,16384,0.0586559996008873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,3072,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,3584,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,4096,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,2560,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,65536,0.11807999759912491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,2048,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,1536,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,1024,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,128,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,512,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,12288,0.04508800059556961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,7168,0.02038400061428547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,5120,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,8192,0.03311999887228012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,16384,0.05734400078654289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,10240,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,2560,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,3584,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,4096,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,3072,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,65536,0.11776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,2048,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,512,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,1024,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,12288,0.04543999955058098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,7168,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,8192,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,5120,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,10240,0.03936000168323517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,16384,0.03497600182890892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,4096,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,65536,0.11680000275373459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,3584,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,3072,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,2048,0.012480000033974648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,2560,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,1536,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,1024,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,12288,0.02848000079393387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,8192,0.03328000009059906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,7168,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,5120,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,10240,0.03868800029158592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,16384,0.03488000109791756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,65536,0.11708799749612808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,4096,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,3584,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,3072,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,2560,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,2048,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,1536,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,1024,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,128,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,65536,0.2104959934949875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,5120,0.09590400010347366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,4096,0.07919999957084656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,12288,0.20217600464820862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,7168,0.12412799894809723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,3072,0.06355199962854385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,3584,0.07078400254249573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,8192,0.14259199798107147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,10240,0.17395199835300446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,1536,0.040063999593257904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,2560,0.05571199953556061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,2048,0.047200001776218414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,1024,0.031168000772595406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,512,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,256,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,128,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,10240,0.05113599821925163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,8192,0.04396799951791763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,16384,0.26736000180244446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,12288,0.05571199953556061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,5120,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,7168,0.037696000188589096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,16384,0.0729919970035553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,3584,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,4096,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,2560,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,2048,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,1536,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,3072,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,512,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,256,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,1024,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,12288,0.047968000173568726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,8192,0.03654399886727333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,5120,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,10240,0.04438399896025658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,16384,0.06006399914622307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,7168,0.03187200054526329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,2560,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,3584,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,3072,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,2048,0.0163199994713068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,4096,0.023072000592947006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,1536,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,1024,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,512,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,256,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,12288,0.045632001012563705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,65536,0.26927998661994934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,10240,0.04047999903559685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,7168,0.031039999797940254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,8192,0.033663999289274216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,16384,0.05696000158786774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,4096,0.022304000332951546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,5120,0.024800000712275505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,3584,0.023135999217629433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,2560,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,2048,0.015936000272631645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,3072,0.01929599978029728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,1024,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,512,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,1536,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,128,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,256,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,65536,0.21532799303531647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,10240,0.03315199911594391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,12288,0.0350399985909462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,7168,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,5120,0.020896000787615776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,8192,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,16384,0.043487999588251114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,4096,0.017696000635623932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,3584,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,1536,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,2560,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,2048,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,1024,0.011807999573647976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,512,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,65536,0.20377600193023682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,12288,0.03363199904561043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,8192,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,10240,0.032416000962257385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,5120,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,7168,0.022911999374628067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,16384,0.041439998894929886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,4096,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,3584,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,3072,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,2560,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,2048,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,512,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,1024,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,1536,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,256,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,65536,0.14918400347232819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,12288,0.03062400035560131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,8192,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,7168,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,5120,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,65536,0.14060799777507782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,10240,0.028672000393271446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,16384,0.0390079990029335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,4096,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,3584,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,2560,0.01398400031030178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,2048,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,3072,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,1536,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,8192,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,7168,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,12288,0.029503999277949333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,10240,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,65536,0.12515200674533844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,5120,0.01817600056529045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,16384,0.03657599911093712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,4096,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,3584,0.01532800029963255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,3072,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,2560,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,2048,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,1536,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,512,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,1024,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,7168,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,12288,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,10240,0.04179200157523155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,8192,0.02502400055527687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,5120,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,16384,0.03670400008559227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,4096,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,3584,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,3072,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,2560,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,2048,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,65536,0.12038400024175644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,1024,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,1536,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,256,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,512,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,128,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,12288,0.046751998364925385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,8192,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,7168,0.030047999694943428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,5120,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,10240,0.02812799997627735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,16384,0.03510399907827377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,4096,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,3584,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,1536,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,2560,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,2048,0.013407999649643898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,3072,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,65536,0.11945600062608719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,1024,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,128,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,12288,0.0461760014295578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,8192,0.03388800099492073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,7168,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,10240,0.040991999208927155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,16384,0.03494400158524513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,5120,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,3584,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,3072,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,2560,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,4096,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,1536,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,65536,0.11795199662446976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,2048,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,1024,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,128,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,7168,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,12288,0.04649600014090538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,8192,0.022624000906944275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,10240,0.026240000501275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,5120,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,16384,0.034752000123262405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,4096,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,3584,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,2560,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,3072,0.014560000039637089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,2048,0.01206399966031313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,65536,0.11683200299739838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,1024,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,1536,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,128,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,8192,0.022016000002622604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,12288,0.04620800167322159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,7168,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,5120,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,10240,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,16384,0.03440000116825104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,4096,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,3584,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,3072,0.013439999893307686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,2560,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,2048,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,65536,0.11628799885511398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,1024,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,256,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,1536,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,12288,0.045504000037908554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,8192,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,7168,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,10240,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,5120,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,16384,0.057440001517534256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,4096,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,3584,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,2560,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,1536,0.011487999930977821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,2048,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,65536,0.1159679964184761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,3072,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,1024,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,256,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,8192,0.03270399942994118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,10240,0.024927999824285507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,7168,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,12288,0.04435199871659279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,5120,0.01696000061929226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,16384,0.05827200040221214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,3584,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,4096,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,3072,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,2560,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,65536,0.11561600118875504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,2048,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,512,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,1024,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,1536,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,128,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,12288,0.04444799944758415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,8192,0.033504001796245575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,7168,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,10240,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,16384,0.0570559985935688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,5120,0.023871999233961105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,4096,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,65536,0.1143679991364479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,3584,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,2560,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,3072,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,1536,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,1024,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,128,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,512,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,65536,0.11433599889278412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,8192,0.033215999603271484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,7168,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,12288,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,16384,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,5120,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,10240,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,4096,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,3584,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,3072,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,2560,0.015552000142633915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,1024,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,2048,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,1536,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,128,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,4096,0.0751039981842041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,65536,0.11446399986743927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,5120,0.09436800330877304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,7168,0.12355200201272964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,8192,0.13993600010871887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,10240,0.1743679940700531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,2560,0.05145600065588951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,12288,0.20032000541687012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,3072,0.060447998344898224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,3584,0.06755200028419495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,2048,0.04291199892759323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,1536,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,256,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,512,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,1024,0.027775999158620834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,128,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,16384,0.2597759962081909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,12288,0.05523199960589409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,8192,0.04287999868392944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,5120,0.02985600009560585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,10240,0.05071999877691269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,7168,0.0360959991812706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,16384,0.07174400240182877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,2048,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,3072,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,4096,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,1536,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,3584,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,2560,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,1024,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,512,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,256,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,128,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,12288,0.046751998364925385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,10240,0.04153599962592125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,7168,0.032287999987602234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,8192,0.03500799834728241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,5120,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,16384,0.0575999990105629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,2560,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,3072,0.021695999428629875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,3584,0.022943999618291855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,4096,0.02287999913096428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,2048,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,1536,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,256,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,1024,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,512,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,8192,0.03328000009059906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,12288,0.045024000108242035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,65536,0.25993600487709045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,10240,0.040991999208927155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,7168,0.029920000582933426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,16384,0.05584000051021576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,3072,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,4096,0.02271999977529049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,5120,0.024768000468611717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,3584,0.02239999920129776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,2560,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,1536,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,1024,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,512,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,2048,0.016256000846624374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,128,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,65536,0.20572799444198608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,12288,0.03494400158524513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,10240,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,8192,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,7168,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,5120,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,16384,0.04460800066590309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,4096,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,3584,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,1536,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,2560,0.017791999503970146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,2048,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,1024,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,65536,0.19724799692630768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,12288,0.033824000507593155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,5120,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,10240,0.031647998839616776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,8192,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,7168,0.02284800074994564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,16384,0.041120000183582306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,4096,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,3072,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,2560,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,1536,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,3584,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,2048,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,65536,0.14815999567508698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,512,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,256,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,128,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,12288,0.0306560005992651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,10240,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,8192,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,7168,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,5120,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,16384,0.03753599897027016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,4096,0.02115200087428093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,3584,0.01990400068461895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,3072,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,2560,0.016767999157309532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,2048,0.013535999692976475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,65536,0.1398400068283081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,1536,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,1024,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,512,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,256,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,7168,0.029920000582933426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,8192,0.03452799841761589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,12288,0.028736000880599022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,10240,0.02675200067460537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,5120,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,16384,0.036607999354600906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,4096,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,3584,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,2560,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,3072,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,2048,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,65536,0.12444800138473511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,256,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,1024,0.011935999616980553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,1536,0.013632000423967838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,12288,0.029120000079274178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,8192,0.03407999873161316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,5120,0.024320000782608986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,10240,0.042080000042915344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,7168,0.030112000182271004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,16384,0.0350399985909462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,4096,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,3584,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,2560,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,3072,0.01772800087928772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,2048,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,65536,0.11907199770212173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,1536,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,12288,0.046560000628232956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,8192,0.03452799841761589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,10240,0.04073600098490715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,7168,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,5120,0.023871999233961105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,16384,0.03481600061058998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,4096,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,3584,0.019936000928282738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,3072,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,65536,0.11776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,2560,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,2048,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,1536,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,1024,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,256,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,512,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,12288,0.0461760014295578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,7168,0.030208000913262367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,8192,0.033663999289274216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,10240,0.04041599854826927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,5120,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,16384,0.03484800085425377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,2560,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,3584,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,3072,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,4096,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,2048,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,65536,0.11606399714946747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,1536,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,512,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,7168,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,12288,0.0461760014295578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,8192,0.033695999532938004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,5120,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,10240,0.03993599861860275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,16384,0.057920001447200775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,3072,0.017823999747633934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,3584,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,2048,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,2560,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,65536,0.11555200070142746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,1536,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,1024,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,512,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,12288,0.045791998505592346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,7168,0.029952000826597214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,10240,0.03916800022125244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,8192,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,5120,0.02377600036561489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,16384,0.057631999254226685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,3072,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,3584,0.01894400082528591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,2560,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,65536,0.115167997777462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,4096,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,2048,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,1536,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,1024,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,12288,0.04527999833226204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,7168,0.0289280004799366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,5120,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,8192,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,10240,0.039583999663591385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,16384,0.05689600110054016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,65536,0.11481600254774094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,3072,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,2048,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,4096,0.020927999168634415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,1536,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,12288,0.044576000422239304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,8192,0.032735999673604965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,10240,0.038816001266241074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,5120,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,16384,0.05692800134420395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,7168,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,4096,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,65536,0.11430399864912033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,3072,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,2560,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,1536,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,2048,0.014879999682307243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,512,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,1024,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,12288,0.044895999133586884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,8192,0.03235200047492981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,5120,0.023679999634623528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,10240,0.03891199827194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,16384,0.05801599845290184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,7168,0.029503999277949333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,65536,0.11315199732780457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,4096,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,3584,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,2560,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,2048,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,3072,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,1024,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,256,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,512,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,12288,0.02751999907195568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,10240,0.03948799893260002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,8192,0.03283200040459633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,7168,0.0297279991209507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,5120,0.023711999878287315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,16384,0.05753599852323532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,65536,0.20720000565052032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,4096,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,3584,0.018432000651955605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,3072,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,2560,0.01548799965530634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,2048,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,1536,0.012512000277638435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,1024,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,128,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,256,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,65536,0.11324799805879593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,5120,0.0926399976015091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,4096,0.07440000027418137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,7168,0.12345600128173828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,8192,0.13948799669742584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,3584,0.06700800359249115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,10240,0.1698240041732788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,3072,0.05907199904322624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,2560,0.050624001771211624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,2048,0.042367998510599136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,12288,0.1966399997472763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,1024,0.025599999353289604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,1536,0.034015998244285583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,256,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,512,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,128,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,16384,0.2560960054397583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,8192,0.042047999799251556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,5120,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,12288,0.05567999929189682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,4096,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,10240,0.049984000623226166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,7168,0.03577600046992302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,16384,0.07142399996519089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,3584,0.02300800010561943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,2048,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,2560,0.01974399946630001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,1536,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,3072,0.02175999991595745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,1024,0.013183999806642532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,256,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,512,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,5120,0.026048000901937485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,8192,0.03436800092458725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,10240,0.041760001331567764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,7168,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,12288,0.04569600149989128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,16384,0.05920000001788139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,4096,0.021824000403285027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,3584,0.021695999428629875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,2560,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,2048,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,3072,0.021056000143289566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,1536,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,512,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,1024,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,128,0.008960000239312649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,256,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,65536,0.25727999210357666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,12288,0.043487999588251114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,10240,0.040063999593257904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,7168,0.029600000008940697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,8192,0.03276799991726875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,16384,0.05516799911856651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,5120,0.02393599972128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,4096,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,3584,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,3072,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,2560,0.01820800080895424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,1536,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,1024,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,2048,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,512,0.01017600018531084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,65536,0.2040639966726303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,12288,0.03593600168824196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,10240,0.03315199911594391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,5120,0.021376000717282295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,7168,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,16384,0.044224001467227936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,8192,0.027264000847935677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,4096,0.02284800074994564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,3584,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,2560,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,2048,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,3072,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,1536,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,65536,0.18995200097560883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,512,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,1024,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,5120,0.021856000646948814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,10240,0.030751999467611313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,12288,0.0326399989426136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,8192,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,7168,0.0226879995316267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,16384,0.04198399931192398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,2560,0.01500799972563982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,4096,0.02208000048995018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,3584,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,2048,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,3072,0.0163199994713068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,65536,0.14819200336933136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,1536,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,1024,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,512,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,12288,0.031072000041604042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,8192,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,5120,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,10240,0.029664000496268272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,16384,0.03840000182390213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,7168,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,2560,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,3584,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,4096,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,2048,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,3072,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,1536,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,65536,0.1375039964914322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,1024,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,512,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,7168,0.024288000538945198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,8192,0.03481600061058998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,12288,0.028575999662280083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,5120,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,10240,0.029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,16384,0.03545600175857544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,4096,0.017920000478625298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,3584,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,2048,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,3072,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,1536,0.014271999709308147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,2560,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,65536,0.12352000176906586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,1024,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,256,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,12288,0.028736000880599022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,7168,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,8192,0.03471999987959862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,10240,0.042080000042915344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,5120,0.024639999493956566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,16384,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,3584,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,3072,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,2560,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,65536,0.11753600090742111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,4096,0.02127999998629093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,2048,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,1536,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,1024,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,7168,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,8192,0.034015998244285583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,5120,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,12288,0.045791998505592346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,10240,0.040832001715898514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,16384,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,3584,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,4096,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,2048,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,3072,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,2560,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,65536,0.11523199826478958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,1536,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,512,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,1024,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,128,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,12288,0.04560000076889992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,7168,0.029184000566601753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,8192,0.025887999683618546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,10240,0.030719999223947525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,5120,0.02425600029528141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,16384,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,3584,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,3072,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,2560,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,4096,0.020735999569296837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,65536,0.11462400108575821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,2048,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,1536,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,1024,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,512,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,7168,0.029888000339269638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,12288,0.0461760014295578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,10240,0.03939199820160866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,5120,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,8192,0.03296000137925148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,16384,0.057151999324560165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,4096,0.020479999482631683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,2048,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,3072,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,3584,0.0197759997099638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,2560,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,65536,0.11392000317573547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,1536,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,128,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,12288,0.044544000178575516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,7168,0.02908799983561039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,8192,0.032575998455286026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,10240,0.029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,16384,0.05820799991488457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,5120,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,4096,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,3584,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,3072,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,2560,0.0163199994713068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,65536,0.11347199976444244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,2048,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,1024,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,12288,0.04608000069856644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,5120,0.023903999477624893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,7168,0.022048000246286392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,8192,0.0326399989426136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,16384,0.05696000158786774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,10240,0.040063999593257904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,4096,0.019840000197291374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,3584,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,3072,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,2560,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,65536,0.11273600161075592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,2048,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,1536,0.012640000320971012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,128,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,256,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,512,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,1024,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,12288,0.04473600164055824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,7168,0.02953599952161312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,8192,0.03235200047492981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,5120,0.023296000435948372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,10240,0.0387520007789135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,16384,0.05750399827957153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,4096,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,3584,0.019231999292969704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,65536,0.1125440001487732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,3072,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,2560,0.015456000342965126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,2048,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,1536,0.01360000018030405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,512,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,12288,0.04476799815893173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,7168,0.028672000393271446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,8192,0.033215999603271484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,16384,0.04214400053024292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,5120,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,10240,0.038784001022577286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,65536,0.1125119999051094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,3584,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,4096,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,2560,0.01583999954164028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,3072,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,2048,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,1536,0.01244799979031086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,1024,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,128,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,512,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,256,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,12288,0.032607998698949814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,7168,0.029152000322937965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,16384,0.04121600091457367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,5120,0.023455999791622162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,8192,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,10240,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,65536,0.11228799819946289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,4096,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,3584,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,2048,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,3072,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,1536,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,2560,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,512,0.009184000082314014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,128,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,65536,0.20604799687862396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,5120,0.09216000139713287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,4096,0.07347200065851212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,7168,0.1212799996137619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,3584,0.06700800359249115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,8192,0.13711999356746674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,2560,0.050655998289585114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,10240,0.16918399930000305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,3072,0.05724800005555153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,12288,0.19699199497699738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,2048,0.04259200021624565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,1536,0.033663999289274216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,1024,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,128,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,512,0.017216000705957413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,256,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,16384,0.25628799200057983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,8192,0.04227200150489807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,5120,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,12288,0.05478399991989136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,7168,0.03743999823927879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,10240,0.04934399947524071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,16384,0.07174400240182877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,3584,0.02319999970495701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,4096,0.02521600015461445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,2560,0.020959999412298203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,2048,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,3072,0.02131200022995472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,1536,0.015807999297976494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,512,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,128,0.008927999995648861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,1024,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,256,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,12288,0.04566400125622749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,7168,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,5120,0.025567999109625816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,8192,0.034272000193595886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,10240,0.041280001401901245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,16384,0.058687999844551086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,2048,0.016383999958634377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,2560,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,3584,0.02179200015962124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,3072,0.021407999098300934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,4096,0.02271999977529049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,1536,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,1024,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,512,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,256,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,12288,0.04195199906826019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,10240,0.03798399865627289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,65536,0.25231999158859253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,8192,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,7168,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,16384,0.054048001766204834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,5120,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,3072,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,4096,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,3584,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,2560,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,1536,0.013952000066637993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,2048,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,1024,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,512,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,65536,0.2017280012369156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,128,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,256,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,12288,0.03532800078392029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,10240,0.0315839983522892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,8192,0.026944000273942947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,5120,0.021183999255299568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,7168,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,16384,0.043935999274253845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,4096,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,3584,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,3072,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,2560,0.015904000028967857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,2048,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,1024,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,1536,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,256,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,512,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,65536,0.18694399297237396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,8192,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,7168,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,12288,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,5120,0.021983999758958817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,10240,0.030559999868273735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,16384,0.042688000947237015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,4096,0.01897599920630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,2048,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,3584,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,1536,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,2560,0.015296000055968761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,3072,0.016287999227643013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,1024,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,65536,0.1478080004453659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,512,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,128,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,12288,0.02956799976527691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,8192,0.02598400041460991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,10240,0.027936000376939774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,5120,0.02035200037062168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,7168,0.025151999667286873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,16384,0.03798399865627289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,4096,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,3072,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,3584,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,2560,0.01651199907064438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,2048,0.015039999969303608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,1536,0.012384000234305859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,512,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,256,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,65536,0.13868799805641174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,1024,0.011615999974310398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,7168,0.024159999564290047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,12288,0.027936000376939774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,8192,0.034304000437259674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,10240,0.028896000236272812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,5120,0.019999999552965164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,16384,0.037087999284267426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,3584,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,4096,0.02112000063061714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,2560,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,3072,0.018015999346971512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,2048,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,65536,0.12291199713945389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,1024,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,1536,0.013567999936640263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,512,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,256,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,128,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,12288,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,7168,0.03081599995493889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,8192,0.033663999289274216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,10240,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,16384,0.035711999982595444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,5120,0.024191999807953835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,4096,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,3584,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,3072,0.01836800016462803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,65536,0.11593600362539291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,2560,0.016416000202298164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,2048,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,1536,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,1024,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,256,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,8192,0.03372799977660179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,7168,0.030912000685930252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,10240,0.03964800015091896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,12288,0.04681599885225296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,5120,0.024000000208616257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,16384,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,3072,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,3584,0.019680000841617584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,4096,0.020767999812960625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,2560,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,2048,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,65536,0.11388800293207169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,1536,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,1024,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,256,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,128,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,12288,0.03385600075125694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,8192,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,7168,0.03001599945127964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,10240,0.04022400081157684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,5120,0.024032000452280045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,16384,0.03827200084924698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,2560,0.015776000916957855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,3072,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,3584,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,4096,0.021215999498963356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,2048,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,65536,0.1133119985461235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,1536,0.012799999676644802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,512,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,1024,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,256,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,128,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,12288,0.04553600028157234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,8192,0.03283200040459633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,7168,0.030079999938607216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,10240,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,5120,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,16384,0.04303999990224838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,65536,0.11273600161075592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,3584,0.018464000895619392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,3072,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,4096,0.02038400061428547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,2560,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,2048,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,1536,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,512,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,256,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,1024,0.01056000031530857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,128,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,12288,0.045152001082897186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,8192,0.03292800113558769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,10240,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,7168,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,5120,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,16384,0.057631999254226685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,3072,0.01679999940097332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,3584,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,4096,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,65536,0.11228799819946289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,2048,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,2560,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,1536,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,1024,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,512,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,256,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,128,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,12288,0.04438399896025658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,8192,0.03286400064826012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,10240,0.03907199949026108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,16384,0.04198399931192398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,5120,0.023744000121951103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,7168,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,65536,0.1120000034570694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,4096,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,3072,0.01727999933063984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,3584,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,2560,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,2048,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,1024,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,1536,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,256,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,512,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,128,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,12288,0.0326399989426136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,5120,0.02364799939095974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,8192,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,7168,0.023104000836610794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,10240,0.039455998688936234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,16384,0.04249599948525429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,4096,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,3072,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,3584,0.018751999363303185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,65536,0.11184000223875046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,2560,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,2048,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,1024,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,1536,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,512,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,128,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,12288,0.04435199871659279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,8192,0.032287999987602234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,7168,0.029120000079274178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,5120,0.018400000408291817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,10240,0.02921600081026554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,16384,0.042047999799251556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,4096,0.020128000527620316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,65536,0.11126399785280228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,3072,0.01414399966597557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,3584,0.01833599992096424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,2560,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,2048,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,1024,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,128,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,256,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,512,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,12288,0.03267199918627739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,5120,0.018303999677300453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,7168,0.02304000034928322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,10240,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,8192,0.02489599958062172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,16384,0.041919998824596405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,65536,0.14287999272346497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,2560,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,2048,0.013824000023305416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,4096,0.01648000068962574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,3072,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,3584,0.015104000456631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,1536,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,256,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,512,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,128,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,1024,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,65536,0.14351999759674072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,4096,0.07654400169849396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,5120,0.09238400310277939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,7168,0.11987199634313583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,8192,0.13391999900341034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,3584,0.07110399752855301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,12288,0.19091199338436127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,10240,0.16012799739837646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,3072,0.06639999896287918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,2048,0.05257600173354149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,2560,0.06380800157785416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,1536,0.04800000041723251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,512,0.04249599948525429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,256,0.04195199906826019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,1024,0.04588799923658371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,128,0.041760001331567764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,16384,0.25091201066970825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,12288,0.05507199838757515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,4096,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,5120,0.03110400028526783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,7168,0.03705599904060364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,10240,0.0480320006608963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,8192,0.041439998894929886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,16384,0.06934399902820587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,3584,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,3072,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,2560,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,2048,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,1536,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,1024,0.017023999243974686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,256,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,512,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,128,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,8192,0.032735999673604965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,12288,0.04476799815893173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,5120,0.025728000327944756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,7168,0.031488001346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,10240,0.03798399865627289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,16384,0.057312000542879105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,3072,0.018912000581622124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,4096,0.020640000700950623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,3584,0.019328000023961067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,2048,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,2560,0.01775999926030636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,512,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,1536,0.0144640002399683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,1024,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,256,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,128,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,65536,0.2905600070953369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,10240,0.03356799855828285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,12288,0.0382080003619194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,8192,0.029632000252604485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,7168,0.02672000043094158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,16384,0.047680001705884933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,4096,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,3072,0.017855999991297722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,5120,0.022592000663280487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,3584,0.017472000792622566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,2560,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,2048,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,512,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,1536,0.013311999849975109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,1024,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,256,0.012896000407636166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,128,0.012927999719977379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,65536,0.24003200232982635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,12288,0.0331839993596077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,10240,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,8192,0.02473600022494793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,7168,0.02300800010561943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,16384,0.040511999279260635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,5120,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,4096,0.016831999644637108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,3584,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,3072,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,2560,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,2048,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,1536,0.01158399973064661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,1024,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,65536,0.20051200687885284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,512,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,256,0.010847999714314938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,128,0.010975999757647514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,12288,0.031039999797940254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,8192,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,10240,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,5120,0.017952000722289085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,16384,0.03891199827194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,7168,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,4096,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,3584,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,65536,0.15964800119400024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,3072,0.014303999952971935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,2560,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,1536,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,2048,0.012032000347971916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,1024,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,256,0.010208000428974628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,128,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,512,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,12288,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,10240,0.02239999920129776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,5120,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,7168,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,16384,0.029823999851942062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,8192,0.018880000337958336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,4096,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,3584,0.01283199992030859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,3072,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,1536,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,2560,0.01196799986064434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,2048,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,65536,0.15593600273132324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,512,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,1024,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,128,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,12288,0.019360000267624855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,5120,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,8192,0.015744000673294067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,10240,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,7168,0.014368000440299511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,16384,0.025248000398278236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,4096,0.011839999817311764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,3584,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,3072,0.011359999887645245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,2560,0.010495999827980995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,2048,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,1536,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,1024,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,65536,0.10998400300741196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,512,0.0080960001796484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,256,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,128,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,12288,0.019168000668287277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,7168,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,10240,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,8192,0.01539199985563755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,5120,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,16384,0.022975999861955643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,4096,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,2560,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,3584,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,3072,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,2048,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,1024,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,1536,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,65536,0.0820159986615181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,512,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,256,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,128,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,12288,0.01849599927663803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,8192,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,7168,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,5120,0.011872000060975552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,10240,0.01603199914097786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,16384,0.021088000386953354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,3584,0.010528000071644783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,4096,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,3072,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,1536,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,2560,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,65536,0.07513599842786789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,2048,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,1024,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,256,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,512,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,128,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,12288,0.01740800030529499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,5120,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,8192,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,10240,0.01484800036996603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,7168,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,16384,0.0208320003002882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,65536,0.07513599842786789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,3584,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,4096,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,3072,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,2560,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,2048,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,1536,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,512,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,128,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,1024,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,256,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,12288,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,10240,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,8192,0.011071999557316303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,7168,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,5120,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,16384,0.014816000126302242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,4096,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,3584,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,3072,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,2560,0.009119999594986439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,65536,0.06969600170850754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,1536,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,2048,0.0081599997356534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,512,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,256,0.006912000011652708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,128,0.006688000168651342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,1024,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,12288,0.011392000131309032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,8192,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,5120,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,7168,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,16384,0.013151999562978745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,10240,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,65536,0.04281599819660187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,4096,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,3584,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,1536,0.007040000054985285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,2560,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,2048,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,3072,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,1024,0.006624000146985054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,128,0.006432000081986189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,256,0.006591999903321266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,512,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,12288,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,10240,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,7168,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,8192,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,5120,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,16384,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,3584,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,4096,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,2560,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,65536,0.0363520011305809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,3072,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,1536,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,2048,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,1024,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,256,0.006271999794989824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,128,0.006560000125318766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,512,0.006624000146985054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,12288,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,10240,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,8192,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,5120,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,16384,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,7168,0.008671999908983707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,65536,0.03062400035560131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,3584,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,4096,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,3072,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,1536,0.006560000125318766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,2048,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,2560,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,512,0.006240000016987324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,128,0.006111999973654747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,1024,0.006304000038653612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,256,0.006271999794989824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,12288,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,8192,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,10240,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,7168,0.008767999708652496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,16384,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,5120,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,65536,0.026847999542951584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,4096,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,3584,0.0072639998979866505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,3072,0.007135999854654074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,2560,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,2048,0.006432000081986189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,1024,0.006527999881654978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,1536,0.0071680000983178616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,512,0.0066559999249875546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,256,0.006207999773323536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,128,0.0063680000603199005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,12288,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,5120,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,7168,0.009151999838650227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,10240,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,8192,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,16384,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,65536,0.02707199938595295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,4096,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,3584,0.0071680000983178616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,3072,0.00687999976798892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,2560,0.0071680000983178616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,2048,0.00687999976798892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,1536,0.0063680000603199005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,1024,0.006144000217318535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,512,0.006047999951988459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,256,0.006047999951988459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,128,0.006207999773323536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,65536,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,4096,0.07174400240182877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,5120,0.08799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,7168,0.11744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,8192,0.12972800433635712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,3584,0.0655680000782013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,2560,0.056671999394893646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,3072,0.0644799992442131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,10240,0.1573439985513687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,12288,0.1860159933567047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,1536,0.041600000113248825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,2048,0.04588799923658371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,512,0.036959998309612274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,256,0.03596799820661545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,128,0.03561599925160408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,1024,0.03951999917626381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,16384,0.246848002076149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,5120,0.02956799976527691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,12288,0.05257600173354149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,10240,0.045152001082897186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,8192,0.03951999917626381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,7168,0.03564799949526787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,16384,0.06719999760389328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,4096,0.023840000852942467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,2560,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,3072,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,2048,0.016448000445961952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,3584,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,1536,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,1024,0.014431999996304512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,128,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,256,0.013504000380635262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,512,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,12288,0.042527999728918076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,7168,0.0289280004799366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,5120,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,10240,0.03670400008559227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,8192,0.03174399957060814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,16384,0.054496001452207565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,4096,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,3584,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,2560,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,2048,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,3072,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,1536,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,512,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,256,0.012000000104308128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,128,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,1024,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,12288,0.03580800071358681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,8192,0.028095999732613564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,10240,0.031936001032590866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,7168,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,16384,0.04560000076889992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,65536,0.24809600412845612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,5120,0.02099199965596199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,4096,0.01788800023496151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,3584,0.01692800037562847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,3072,0.016287999227643013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,2560,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,2048,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,1536,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,1024,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,512,0.01152000017464161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,128,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,256,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,65536,0.194240003824234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,12288,0.030271999537944794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,8192,0.023391999304294586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,10240,0.026976000517606735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,7168,0.02163200080394745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,16384,0.03920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,5120,0.018783999606966972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,4096,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,3072,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,2048,0.011680000461637974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,2560,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,3584,0.014783999882638454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,1536,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,512,0.0098879998549819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,1024,0.011008000001311302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,256,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,128,0.010080000385642052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,65536,0.16208000481128693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,8192,0.02160000056028366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,12288,0.02844800055027008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,5120,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,7168,0.02022399939596653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,10240,0.024960000067949295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,16384,0.03545600175857544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,4096,0.014592000283300877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,3584,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,1536,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,3072,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,2560,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,2048,0.011103999800980091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,1024,0.009664000011980534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,65536,0.13078400492668152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,512,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,256,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,128,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,12288,0.023360000923275948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,5120,0.014112000353634357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,7168,0.01635199971497059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,16384,0.027648000046610832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,8192,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,10240,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,4096,0.012415999546647072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,2048,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,3072,0.011296000331640244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,2560,0.01142400037497282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,65536,0.12355200201272964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,3584,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,1536,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,1024,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,256,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,512,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,128,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,10240,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,12288,0.018719999119639397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,5120,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,7168,0.013856000266969204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,8192,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,16384,0.022112000733613968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,4096,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,3072,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,2560,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,3584,0.010367999784648418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,2048,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,65536,0.09094399958848953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,1536,0.00825599953532219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,1024,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,512,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,256,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,128,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,12288,0.01724799908697605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,8192,0.013919999822974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,7168,0.013088000006973743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,10240,0.015359999611973763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,5120,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,16384,0.02070399932563305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,2560,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,3584,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,3072,0.009855999611318111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,4096,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,2048,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,65536,0.06969600170850754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,1536,0.008415999822318554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,1024,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,256,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,512,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,128,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,7168,0.012543999589979649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,12288,0.016543999314308167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,10240,0.015263999812304974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,8192,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,5120,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,16384,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,3072,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,4096,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,2560,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,2048,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,3584,0.009631999768316746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,65536,0.06361600011587143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,1536,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,1024,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,512,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,256,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,128,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,12288,0.015424000099301338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,10240,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,7168,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,8192,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,5120,0.011168000288307667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,16384,0.018271999433636665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,4096,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,3584,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,3072,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,2560,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,1536,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,2048,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,65536,0.05875200033187866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,512,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,1024,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,256,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,128,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,12288,0.012575999833643436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,5120,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,7168,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,8192,0.01033599954098463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,10240,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,16384,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,4096,0.00848000030964613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,3584,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,3072,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,2048,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,2560,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,65536,0.056095998734235764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,1536,0.007679999805986881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,128,0.006591999903321266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,512,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,256,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,1024,0.006912000011652708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,12288,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,7168,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,8192,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,5120,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,10240,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,16384,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,4096,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,2048,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,3584,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,3072,0.007552000228315592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,65536,0.039423998445272446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,2560,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,1536,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,1024,0.006912000011652708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,512,0.006432000081986189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,256,0.006560000125318766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,128,0.0066559999249875546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,12288,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,5120,0.007968000136315823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,8192,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,7168,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,16384,0.01119999960064888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,10240,0.009344000369310379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,4096,0.0074880002066493034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,3072,0.0072639998979866505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,3584,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,65536,0.03283200040459633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,2560,0.007360000163316727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,2048,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,1536,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,512,0.006111999973654747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,256,0.006432000081986189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,128,0.006240000016987324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,1024,0.006335999816656113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,12288,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,5120,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,8192,0.008128000423312187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,7168,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,10240,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,16384,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,3072,0.006912000011652708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,65536,0.027135999873280525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,2560,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,4096,0.0071680000983178616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,3584,0.007135999854654074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,2048,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,1536,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,512,0.005919999908655882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,256,0.006175999995321035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,1024,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,128,0.006111999973654747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,12288,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,16384,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,7168,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,8192,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,5120,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,10240,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,65536,0.021727999672293663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,4096,0.007104000076651573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,3584,0.006719999946653843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,3072,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,2560,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,1536,0.006111999973654747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,1024,0.0063680000603199005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,2048,0.006335999816656113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,256,0.0063680000603199005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,512,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,128,0.006399999838322401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,12288,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,8192,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,5120,0.0072639998979866505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,7168,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,10240,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,16384,0.009696000255644321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,65536,0.02067199908196926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,4096,0.006783999968320131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,3584,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,3072,0.006591999903321266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,1536,0.006432000081986189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,2048,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,2560,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,256,0.005791999865323305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,1024,0.006207999773323536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,512,0.006047999951988459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,128,0.006591999903321266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,65536,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,4096,0.06761600077152252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,5120,0.08348800241947174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,7168,0.11148799955844879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,8192,0.12428800016641617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,3072,0.054368000477552414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,3584,0.060256000608205795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,2560,0.04995200037956238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,10240,0.15296000242233276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,12288,0.1815679967403412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,1536,0.033824000507593155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,512,0.02800000086426735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,2048,0.04025600105524063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,256,0.02783999964594841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,128,0.027744000777602196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,1024,0.03129599988460541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,16384,0.24220800399780273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,5120,0.027008000761270523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,12288,0.05270399898290634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,8192,0.03721600025892258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,7168,0.03468799963593483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,10240,0.045632001012563705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,16384,0.0676800012588501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,4096,0.02195199951529503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,3584,0.020864000543951988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,3072,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,2560,0.017983999103307724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,1536,0.013887999579310417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,1024,0.012128000147640705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,512,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,2048,0.015584000386297703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,256,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,128,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,12288,0.04089599847793579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,5120,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,7168,0.0272000003606081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,10240,0.035551998764276505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,8192,0.029791999608278275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,16384,0.05196800082921982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,4096,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,3072,0.015968000516295433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,3584,0.016992000862956047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,2560,0.014976000413298607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,2048,0.013344000093638897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,1536,0.012160000391304493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,1024,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,256,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,512,0.010591999627649784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,128,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,12288,0.035392001271247864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,7168,0.024191999807953835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,8192,0.0261439997702837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,10240,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,65536,0.2481279969215393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,16384,0.04508800059556961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,3584,0.015519999898970127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,4096,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,3072,0.014751999638974667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,5120,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,256,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,2048,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,1536,0.011455999687314034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,1024,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,2560,0.014047999866306782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,512,0.009472000412642956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,128,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,65536,0.1860480010509491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,8192,0.022207999601960182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,12288,0.03001599945127964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,5120,0.016863999888300896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,10240,0.02582399919629097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,7168,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,16384,0.03638400137424469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,3584,0.014336000196635723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,4096,0.014399999752640724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,2048,0.010912000201642513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,3072,0.013120000250637531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,2560,0.012319999746978283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,1536,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,256,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,512,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,1024,0.009216000325977802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,65536,0.1599999964237213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,128,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,5120,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,12288,0.027744000777602196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,8192,0.020287999883294106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,7168,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,10240,0.02396799996495247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,16384,0.03488000109791756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,3584,0.013055999763309956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,4096,0.013791999779641628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,3072,0.01235199999064207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,2560,0.012864000163972378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,2048,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,1536,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,1024,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,512,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,256,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,65536,0.12652799487113953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,12288,0.020416000857949257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,8192,0.01587199978530407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,7168,0.015231999568641186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,10240,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,5120,0.012959999963641167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,16384,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,4096,0.011552000418305397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,3584,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,2560,0.010111999697983265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,3072,0.010688000358641148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,1536,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,2048,0.009600000455975533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,1024,0.00800000037997961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,65536,0.11606399714946747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,512,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,256,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,128,0.008224000222980976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,12288,0.01833599992096424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,7168,0.01375999953597784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,8192,0.014911999925971031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,5120,0.013376000337302685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,16384,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,10240,0.016896000131964684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,4096,0.010623999871313572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,3584,0.010879999957978725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,65536,0.08553600311279297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,3072,0.009952000342309475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,2560,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,2048,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,1024,0.008511999621987343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,1536,0.008063999935984612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,512,0.007872000336647034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,128,0.007584000006318092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,256,0.007455999962985516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,12288,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,5120,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,8192,0.014015999622642994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,7168,0.013024000450968742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,10240,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,16384,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,4096,0.010432000271975994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,3584,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,2560,0.009247999638319016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,3072,0.009920000098645687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,65536,0.07100799679756165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,2048,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,1024,0.007424000184983015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,1536,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,256,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,128,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,512,0.007455999962985516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,12288,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,5120,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,7168,0.011327999643981457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,10240,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,8192,0.01190400030463934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,16384,0.017376000061631203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,4096,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,3584,0.008895999751985073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,2048,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,3072,0.008704000152647495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,2560,0.008448000065982342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,65536,0.06639999896287918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,1024,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,1536,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,512,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,256,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,128,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,12288,0.013279999606311321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,5120,0.00940799992531538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,7168,0.010463999584317207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,8192,0.010944000445306301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,10240,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,16384,0.01600000075995922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,4096,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,3584,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,3072,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,2560,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,65536,0.05167999863624573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,1024,0.00684799998998642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,2048,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,1536,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,512,0.006783999968320131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,256,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,128,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,12288,0.012095999903976917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,10240,0.011136000044643879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,7168,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,5120,0.008608000352978706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,8192,0.010400000028312206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,16384,0.01408000010997057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,2048,0.00723200011998415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,3072,0.007519999984651804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,2560,0.007615999784320593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,1536,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,3584,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,4096,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,65536,0.04566400125622749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,256,0.006591999903321266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,512,0.0066559999249875546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,128,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,1024,0.006752000190317631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,12288,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,5120,0.008383999578654766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,8192,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,7168,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,10240,0.009983999654650688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,16384,0.012671999633312225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,4096,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,3584,0.0077760000713169575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,3072,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,2560,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,65536,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,2048,0.006912000011652708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,1536,0.006719999946653843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,512,0.006560000125318766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,1024,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,256,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,128,0.006271999794989824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,5120,0.007648000027984381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,12288,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,7168,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,8192,0.008736000396311283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,10240,0.009440000168979168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,16384,0.01104000024497509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,4096,0.007104000076651573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,3072,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,2560,0.006976000033318996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,65536,0.033055998384952545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,2048,0.006591999903321266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,1536,0.006527999881654978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,3584,0.0071680000983178616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,1024,0.006240000016987324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,512,0.006240000016987324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,256,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,128,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,12288,0.008991999551653862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,8192,0.007935999892652035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,16384,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,5120,0.007712000049650669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,7168,0.007903999648988247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,10240,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,65536,0.02595200017094612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,4096,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,3072,0.007007999811321497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,3584,0.006816000211983919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,1536,0.006271999794989824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,2560,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,2048,0.006240000016987324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,1024,0.006335999816656113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,256,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,128,0.006240000016987324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,512,0.006175999995321035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,12288,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,8192,0.008031999692320824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,10240,0.00863999966531992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,7168,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,16384,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,5120,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,65536,0.022304000332951546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,4096,0.00687999976798892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,2560,0.006783999968320131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,3072,0.006943999789655209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,3584,0.006719999946653843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,2048,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,1536,0.006463999859988689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,1024,0.006207999773323536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,512,0.006240000016987324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,256,0.006207999773323536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,128,0.006047999951988459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,12288,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,10240,0.008191999979317188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,8192,0.00774399982765317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,5120,0.007199999876320362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,16384,0.009375999681651592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,7168,0.007391999941319227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,65536,0.019967999309301376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,2560,0.006560000125318766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,3584,0.00687999976798892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,4096,0.006719999946653843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,3072,0.006719999946653843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,1024,0.006335999816656113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,1536,0.005919999908655882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,2048,0.006240000016987324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,512,0.006207999773323536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,256,0.006144000217318535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,128,0.006271999794989824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,65536,0.019872000440955162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,4096,6.027303568522135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,5120,7.658247375488282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,6144,9.006076049804687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,7168,10.389544677734374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,8192,12.141764322916668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,3584,5.291146850585937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,3072,4.634222920735677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,2560,3.8589332580566404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,1536,2.279143524169922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,1024,1.4628917694091796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,2048,2.9855295817057295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,128,0.3940277417500814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,10240,14.91214396158854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,512,0.8262367884318034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,64,0.36095892588297523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,768,1.1586602528889975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,256,0.5125738779703777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,32,0.35553385416666666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,12288,17.788932291666665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,7168,2.5412320454915363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,6144,2.2362464904785155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,8192,2.979777018229167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,10240,3.8147221883138025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,65536,16384,25.56114298502604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,12288,4.422417195638021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,3584,1.2917471567789713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,4096,1.4302346547444662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,5120,1.7590421040852864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,2560,0.885097630818685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,3072,1.0759882609049478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,768,0.29249067306518556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,1024,0.3732490539550781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,2048,0.7132938385009766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,1536,0.5564479827880859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,128,0.09389119942982992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,256,0.12349013487497966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,512,0.21037440299987792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,32,0.08737493356068929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,64,0.08889066378275554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,16384,6.1193384806315105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,6144,1.5207178751627604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,7168,1.8313973744710286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,8192,2.0807103474934894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,10240,2.698250579833984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,12288,3.280523681640625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,4096,1.0336352030436198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,3584,0.8928202946980794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,5120,1.319113540649414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,3072,0.7715338389078776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,1024,0.27811307907104493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,1536,0.3973024050394694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,2048,0.5165621439615886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,2560,0.6600800196329752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,16384,4.367893473307292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,512,0.15612692832946778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,768,0.21572267214457191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,128,0.06924266815185547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,256,0.09509332974751791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,32,0.06799786885579427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,64,0.06260053316752115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,7168,1.5690687815348308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,8192,1.8074923197428387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,10240,2.193919881184896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,12288,2.7264363606770834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,5120,1.07609494527181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,6144,1.3443093617757162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,4096,0.8777919769287109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,16384,3.678059641520182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,3584,0.7591605504353841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,3072,0.6560042699178059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,2560,0.5625727971394856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,2048,0.43986028035481767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,1024,0.23216533660888672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,1536,0.33658132553100584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,768,0.1832640012105306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,512,0.135590394337972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,256,0.08517440160115561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,32,0.0568181316057841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,128,0.061273598670959474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,64,0.05630079905192057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,8192,1.4363061269124349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,10240,1.8017962137858075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,12288,2.1799786885579424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,16384,2.9499041239420576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,7168,1.262603759765625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,6144,1.0342624028523764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,5120,0.8745941162109375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,2560,0.4459349314371745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,4096,0.7055018742879231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,3584,0.6172447840372721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,3072,0.5269578615824382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,768,0.1437887986501058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,16384,65536,26.043752034505207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,1024,0.18990507125854492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,1536,0.2721023877461751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,2048,0.35778026580810546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,512,0.1046720027923584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,128,0.04935786724090576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,32,0.04621119896570842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,64,0.04505279858907064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,256,0.06605653365453085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,12288,65536,17.92470906575521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,8192,1.196141815185547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,10240,1.5382208506266275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,12288,1.765778096516927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,6144,0.9013568242390951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,7168,1.055291748046875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,16384,2.4539263407389322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,5120,0.7458826700846355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,4096,0.5987648010253906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,2048,0.3033098538716634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,1536,0.23309226036071778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,2560,0.37708158493041993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,3072,0.4599722544352214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,3584,0.532315731048584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,1024,0.162118403116862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,256,0.05764480034510294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,768,0.1274890661239624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,512,0.09289173285166422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,128,0.04476586580276489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,32,0.04123733441034953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,64,0.04083306789398193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,10240,65536,15.023465983072915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,7168,0.8922250747680665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,8192,1.0281450907389322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,10240,1.3333440144856772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,12288,1.5967540740966797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,5120,0.6369866689046224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,4096,0.5260138511657715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,6144,0.7667456309000651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,16384,2.105293909708659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,3584,0.4590911865234375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,2560,0.33085225423177084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,2048,0.26267199516296386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,1536,0.2005674680074056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,3072,0.3868330637613932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,1024,0.1421994686126709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,768,0.11217280228932698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,512,0.0807477315266927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,64,0.036101333300272626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,32,0.035758932431538895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,128,0.038815999031066896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,256,0.04949546655019124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,8192,65536,12.161798095703125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,7168,0.7716021219889323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,8192,0.8527135848999023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,10240,1.0852938334147135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,12288,1.3503519694010415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,6144,0.6591168085734049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,5120,0.5394026438395183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,4096,0.43380692799886067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,3584,0.3917311986287435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,16384,1.7537802378336589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,2560,0.2779285430908203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,1536,0.1740224043528239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,2048,0.2229322592417399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,1024,0.121560533841451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,3072,0.33801066080729164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,512,0.070470396677653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,128,0.03297706643740336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,64,0.02917226751645406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,256,0.04559253454208374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,768,0.09438186486562093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,32,0.030510934193929036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,7168,65536,10.358545939127605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,8192,0.6886816024780273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,10240,0.8376565297444662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,7168,0.5863775889078776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,12288,1.0177482604980468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,6144,0.5196586608886719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,16384,1.411676788330078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,3584,0.30106986363728844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,5120,0.4258442560831706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,4096,0.34052267074584963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,3072,0.2650826613108317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,1024,0.09410666624704997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,1536,0.13509333928426107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,2560,0.222052272160848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,2048,0.17509973843892415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,768,0.07627092997233073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,256,0.03520853519439697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,512,0.05477759838104248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,128,0.02723840077718099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,64,0.024422399202982583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,32,0.025522132714589436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,6144,65536,8.871983846028645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,8192,0.6044640223185221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,10240,0.737442143758138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,12288,0.9109365463256835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,7168,0.5193429311116536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,6144,0.45682452519734695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,16384,1.1955029805501303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,5120,0.37363519668579104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,4096,0.3091423988342285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,3584,0.2653461297353109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,3072,0.23308053016662597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,1024,0.08344000180562337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,1536,0.12157119909922283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,2560,0.1898314634958903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,2048,0.1542239983876546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,512,0.04899626572926839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,256,0.031546666224797563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,128,0.0250709335009257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,64,0.02335360050201416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,768,0.06734933058420817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,32,0.02357333302497864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,5120,65536,7.466607157389324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,8192,0.5081503868103028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,10240,0.6494645436604818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,7168,0.448685868581136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,12288,0.7588085174560547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,4096,0.2615808010101318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,6144,0.38418025970458985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,5120,0.329963747660319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,16384,1.0021194458007812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,3584,0.22796266873677573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,1024,0.07263253529866537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,2048,0.13591787020365398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,1536,0.1029695987701416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,2560,0.1641749382019043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,3072,0.19764587084452312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,128,0.02113599975903829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,512,0.04310186703999837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,64,0.019694934288660683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,768,0.05759146610895792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,256,0.028540800015131634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,32,0.021610667308171592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,4096,65536,5.841621398925781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,8192,0.42267627716064454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,7168,0.37778453826904296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,10240,0.5236096064249675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,12288,0.6275807698567708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,6144,0.3192469278971354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,4096,0.21596266428629557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,5120,0.27262614568074545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,16384,0.8516981124877929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,3584,0.19032319386800128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,1024,0.06242453257242838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,3072,0.1647605260213216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,2560,0.13950506846110028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,1536,0.08801066875457764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,2048,0.1147157351175944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,512,0.037972267468770346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,256,0.02618559996287028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,64,0.01655359963575999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,128,0.019847466548283895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,768,0.05042239824930826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,32,0.018104533354441323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3584,65536,5.118488566080729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,8192,0.3359882672627767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,7168,0.2958122571309408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,10240,0.41702613830566404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,12288,0.503111457824707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,6144,0.25397866566975913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,5120,0.21384533246358237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,4096,0.1721941312154134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,16384,0.6743477503458659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,3584,0.1524735927581787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,2048,0.08965120315551758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,3072,0.1308085362116496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,1024,0.050360532601674404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,2560,0.11050346692403157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,1536,0.07104213237762451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,512,0.03104426662127177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,256,0.021280000607172646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,64,0.013434666395187377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,768,0.04065706729888916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,128,0.016029866536458333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,32,0.01388800044854482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,3072,65536,4.427635192871094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,8192,0.25270934104919435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,10240,0.31465279261271156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,7168,0.2278421401977539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,12288,0.37540693283081056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,6144,0.1913258711496989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,4096,0.13328213691711427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,5120,0.16107625961303712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,16384,0.5061855951944987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,3584,0.11482026576995849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,3072,0.09937067031860351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,1024,0.03918186823527019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,1536,0.054626135031382236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,2048,0.06908480326334634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,2560,0.08601280053456625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,768,0.031931734085083006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,128,0.013037866353988648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,64,0.011142399907112122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,256,0.017156267166137697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,32,0.01162559986114502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,512,0.024521599213282265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2560,65536,3.695964813232422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,8192,0.16801706949869794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,7168,0.15184106826782226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,10240,0.20941972732543945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,12288,0.2548469384511312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,6144,0.1274517297744751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,4096,0.08686400254567464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,5120,0.10983893076578777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,16384,0.3407007853190104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,3584,0.07797333399454752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,1024,0.025166932741800947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,2560,0.056117331981658934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,1536,0.03601813316345215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,2048,0.046674132347106934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,3072,0.06621013482411703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,128,0.008321066697438557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,256,0.010883200168609618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,512,0.015432533621788026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,64,0.00724480003118515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,768,0.020204800367355346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,32,0.007645866771539052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,2048,65536,2.852869415283203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,8192,0.13513280550638834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,7168,0.11544960339864094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,10240,0.1668831984202067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,12288,0.19529706637064617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,6144,0.10218133131663006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,4096,0.06930027008056641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,5120,0.08334933121999105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,16384,0.25968640645345054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,3584,0.0599231998125712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,1536,0.027747199932734175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,1024,0.020012799898783365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,2048,0.03584213256835937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,3072,0.05313173135121664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,2560,0.044835201899210614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,768,0.016104533274968465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,128,0.006995200117429097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,64,0.00609493354956309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,256,0.008821333448092144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,512,0.012444800138473511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,32,0.006279466549555461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1536,65536,2.0635882059733075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,7168,0.07817386786142985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,8192,0.08791999816894532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,10240,0.10873493353525798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,12288,0.1292298634847005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,5120,0.05630079905192057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,6144,0.06734506289164224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,4096,0.04577813148498535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,16384,0.1752405325571696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,3584,0.04011840025583903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,3072,0.03519146839777629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,1024,0.014115200440088908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,2048,0.024318933486938477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,1536,0.018935465812683107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,2560,0.030125866333643597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,768,0.01167039970556895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,512,0.009272533655166625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,64,0.005176533261934916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,32,0.0053941334287325535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,128,0.005830400188763936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,256,0.006967466572920482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,1024,65536,1.362279510498047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,8192,0.0493397315343221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,7168,0.04367680152257283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,10240,0.062990931669871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,12288,0.07332586447397868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,5120,0.032022400697072344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,6144,0.037538135051727296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,4096,0.026758400599161784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,16384,0.09810880025227865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,3584,0.023348265886306764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,1024,0.00840106705824534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,1536,0.010841600100199382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,3072,0.020606933037439983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,2048,0.013356799880663553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,2560,0.016478932897249856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,768,0.007253333429495494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,32,0.004267733295758565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,512,0.006192000210285186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,64,0.0039264000952243805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,128,0.004262400170167288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,256,0.00487253318230311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,768,65536,1.0616085052490234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,8192,0.036848000685373944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,7168,0.03285226623217265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,10240,0.045474131902058915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,12288,0.05313599904378256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,5120,0.025166932741800947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,6144,0.028784000873565675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,16384,0.07037973403930664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,4096,0.021179733673731486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,3584,0.018538665771484376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,1536,0.007098666826883952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,3072,0.016104533274968465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,1024,0.0057429333527882894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,2048,0.008298666775226593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,2560,0.01009493370850881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,128,0.003606399893760681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,256,0.003874133278926214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,768,0.005228800078233084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,512,0.004615466793378194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,64,0.0032405334214369455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,512,65536,0.6976821263631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,32,0.00360959991812706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,7168,0.03258879979451497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,8192,0.037254401048024494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,10240,0.04348586797714234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,12288,0.052849066257476804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,5120,0.024658133586247764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,6144,0.029292800029118854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,4096,0.0200981338818868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,16384,0.07122666835784912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,3584,0.01817173361778259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,3072,0.01591253379980723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,2560,0.0105813334385554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,1024,0.005836800237496694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,1536,0.007469866673151653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,2048,0.008591999610265095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,768,0.005018666883309682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,256,0.003751466671625773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,128,0.0033258666594823206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,64,0.003173333406448364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,512,0.00436160018046697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,256,65536,0.3722720146179199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,32,0.003319466610749563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,7168,0.030849067370096843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,8192,0.034414935111999514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,10240,0.04212586482365926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,12288,0.05385919809341431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,6144,0.02704213261604309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,5120,0.023264000813166298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,4096,0.019387733936309815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,16384,0.06893440087636313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,3584,0.017270400126775106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,3072,0.013796266913414002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,2560,0.009050666292508443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,2048,0.007713066538174947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,1024,0.005108266572157542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,768,0.00470719983180364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,1536,0.006538666784763336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,256,0.0034783999125162757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,64,0.0029781334102153777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,512,0.004099199920892716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,128,0.003101866692304611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,128,65536,0.28257598876953127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,32,0.0029578665892283124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,6144,4.48069101969401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,64,65536,0.27272958755493165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,7168,5.21263682047526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,8192,5.969276936848958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,10240,7.388832092285156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,3584,2.5507606506347655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,4096,2.8936533610026043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,3072,2.117798360188802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,5120,3.7440266927083337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,12288,9.136946614583334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,512,0.42705386479695634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,1536,1.07945925394694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,2048,1.4219231923421225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,1024,0.7321461359659831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,768,0.5816661198933919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8192,32,65536,0.2583999951680501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,64,0.18187840779622394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,128,0.20050346056620277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,2560,1.726598358154297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,256,0.26066880226135253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,32,0.17878293991088867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,65536,16384,11.94138895670573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,6144,1.066492780049642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,7168,1.2404042561848958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,8192,1.4015562693277994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,10240,1.810862986246745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,3584,0.6336405436197917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,4096,0.7041045506795247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,12288,2.224780782063802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,3072,0.5289696057637532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,5120,0.8820725123087565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,1024,0.19065173467000324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,512,0.10677013397216797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,2560,0.4470165252685547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,1536,0.27392533620198567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,768,0.14350825945536297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,256,0.0637503981590271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,2048,0.36721919377644857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,128,0.04982826709747314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,64,0.04583253463109334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,32,0.046639998753865555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,16384,2.969957224527995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,6144,0.7807381312052409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,7168,0.9013952255249024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,8192,1.010322125752767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,10240,1.2677407582600912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,12288,1.5797866821289062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,4096,0.5154015858968098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,3584,0.45067412058512374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,5120,0.655841064453125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,3072,0.3908501307169596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,16384,2.126403172810872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,2560,0.33117332458496096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,1024,0.14293012619018555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,1536,0.1999381383260091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,256,0.05008960167566935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,2048,0.2621802647908529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,768,0.11079999605814617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,128,0.03841813405354817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,512,0.081167999903361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,32,0.03625919818878174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,64,0.03567253351211548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,7168,0.7508650461832682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,8192,0.8786495844523111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,10240,1.081606419881185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,12288,1.3013205210367838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,5120,0.5521024068196614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,6144,0.6418432235717774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,4096,0.4348874727884929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,16384,1.7870229085286458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,3584,0.3915295918782552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,2048,0.22379840215047203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,1536,0.1721567948659261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,2560,0.27721707026163733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,1024,0.1220970630645752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,768,0.09474240144093832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,3072,0.3381333351135254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,64,0.030114134152730305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,512,0.07018346786499023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,256,0.04599680105845134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,128,0.03421119848887126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,32,0.03160960078239441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,8192,0.693449592590332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,10240,0.8590047836303711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,12288,1.0165226618448893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,7168,0.6054026921590169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,16384,1.3725407918294272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,6144,0.5211359977722168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,5120,0.42610985438028975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,4096,0.35184106826782224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,3584,0.30383148193359377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,3072,0.26606613794962564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,16384,65536,12.163069661458334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,2560,0.21747520764668785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,2048,0.17974400520324707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,1536,0.13603413899739583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,1024,0.09687999884287515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,768,0.07765226364135742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,256,0.03573866685231526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,512,0.05512319803237915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,128,0.027879466613133747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,64,0.02466986576716105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,32,0.02579306761423747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,12288,65536,8.887322998046875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,8192,0.5998442967732747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,10240,0.7538623809814453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,12288,0.8829034805297852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,7168,0.519054921468099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,16384,1.1661567687988281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,6144,0.45250879923502607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,5120,0.3833738644917806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,3584,0.262989870707194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,4096,0.3031914710998535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,2048,0.1580586592356364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,3072,0.2277898629506429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,2560,0.19426773389180502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,10240,65536,7.492727661132813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,1024,0.08445653120676676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,1536,0.11859412988026936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,768,0.06733333269755046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,256,0.03174293239911397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,512,0.04914773305257161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,128,0.026165332396825152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,64,0.023099732398986817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,32,0.024871466557184856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,7168,0.44250879287719724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,8192,0.5100864092508952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,10240,0.6324757258097331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,12288,0.7595189412434895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,8192,65536,5.888906860351563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,6144,0.3809119860331217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,5120,0.32314879099527993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,4096,0.2597194671630859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,16384,1.019271469116211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,3584,0.2274229367574056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,3072,0.1989162604014079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,1024,0.07355413436889649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,2048,0.13420160611470539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,1536,0.10265279610951741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,768,0.05738986730575561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,512,0.04268053372701009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,2560,0.16346240043640137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,128,0.021496532360712688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,256,0.02779200077056885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,64,0.02063039938608805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,32,0.020658133427302043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,7168,0.3744842529296875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,8192,0.4284074783325195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,10240,0.5401824315388997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,12288,0.6407530466715495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,16384,0.8396362940470377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,5120,0.2717695871988932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,6144,0.32256641387939455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,4096,0.2224309285481771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,3584,0.193667205174764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,1536,0.08910826841990152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,7168,65536,5.275475056966146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,2048,0.11356053352355958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,3072,0.16571626663208008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,2560,0.140993070602417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,1024,0.06284266710281372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,512,0.03911466598510742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,256,0.026228266954421996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,64,0.016793600718180337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,128,0.019900800784428914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,768,0.05029973189036051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,32,0.0175327996412913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,7168,0.2979082743326823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,8192,0.3426911989847819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,10240,0.41667947769165037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,12288,0.5026368141174317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,6144,0.25666346549987795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,5120,0.2176138718922933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,16384,0.6645973205566407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,4096,0.17207892735799152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,3584,0.15278293291727701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,2048,0.08980053265889486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,3072,0.13257813453674316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,1024,0.050001064936319985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,1536,0.07034666538238525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,2560,0.11264213720957439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,512,0.030842665831247968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,6144,65536,4.539875284830729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,768,0.04047040144602458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,128,0.015974400440851848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,256,0.02113706668217977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,64,0.013540266950925191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,32,0.014339199662208557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,7168,0.2778442700703939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,8192,0.31994880040486656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,10240,0.3926442782084147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,12288,0.4655466715494792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,5120,0.20325120290120444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,4096,0.1620533307393392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,16384,0.6205162684122721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,6144,0.23943467140197755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,5120,65536,3.668000030517578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,3584,0.14211093584696452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,1024,0.04874773422876994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,768,0.03900373379389445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,1536,0.06645866632461547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,2048,0.08500373363494873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,3072,0.1255626678466797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,2560,0.10443413257598877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,512,0.024283732970555624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,128,0.01409173309803009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,256,0.016565333803494772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,64,0.013529599706331889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,32,0.014294399817784628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,7168,0.22763519287109374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,8192,0.2546474615732829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,10240,0.3149930636088053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,12288,0.38089494705200194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,16384,0.5034986813863118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,5120,0.1619690736134847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,4096,0.13048533598581952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,6144,0.19616959889729818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,4096,65536,2.9126187642415364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,3584,0.11637760003407796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,3072,0.10005653699239095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,768,0.032103466987609866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,1024,0.0397546648979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,2560,0.08408532937367758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,1536,0.05420800050099691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,2048,0.06964373588562012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,512,0.024540799856185912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,128,0.013110400239626566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,256,0.017097600301106772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,64,0.01125333309173584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,32,0.011603200435638427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,7168,0.18798720041910807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,8192,0.20979733467102052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,10240,0.2628981272379557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,12288,0.320411745707194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,4096,0.1086687962214152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,16384,0.4162005424499512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,5120,0.13327360153198242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,6144,0.1617930730183919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,3584,0.09730772972106934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,1024,0.03195733428001404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,1536,0.04482346773147583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3584,65536,2.687468719482422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,2560,0.0697376012802124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,2048,0.056686933835347494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,3072,0.08290879726409912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,128,0.010070400436719258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,768,0.025700267155965167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,32,0.00901759962240855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,64,0.008640000224113464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,256,0.013386666774749756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,512,0.01930026610692342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,7168,0.14930346806844075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,8192,0.17153280576070148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,10240,0.2070634682973226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,12288,0.24953920046488443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,6144,0.12874773343404133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,16384,0.33779732386271155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,5120,0.10888319810231525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,4096,0.08584106763203939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,3584,0.07632959683736165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,3072,65536,2.1133141835530598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,2560,0.05646506547927856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,2048,0.04615253210067749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,3072,0.06672960122426351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,1536,0.03518826564153035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,1024,0.025509333610534667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,512,0.015593600273132325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,768,0.020814933379491172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,256,0.010961066683133442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,128,0.00832533339659373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,64,0.007163733243942261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,32,0.007539199789365132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,7168,0.11496213277180989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,8192,0.1298357327779134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,10240,0.164628267288208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,12288,0.19837867418924968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,6144,0.09919892946879069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,5120,0.08264746665954589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,4096,0.06848426659901938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,16384,0.25762346585591633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,3584,0.060568531354268394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2560,65536,1.7001856486002604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,2048,0.03547733227411906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,1024,0.01981333295504252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,2560,0.04295146862665812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,3072,0.051514665285746254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,1536,0.027801599105199176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,768,0.01644053359826406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,256,0.008851200342178345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,128,0.0070826664566993715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,64,0.006169599791367849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,512,0.012612266341845193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,32,0.0063360000650088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,7168,0.07484266757965088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,8192,0.08639146486918131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,10240,0.10560746987660725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,12288,0.12480213642120361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,5120,0.05542399883270264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,6144,0.06449173291524252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,2048,65536,1.3102923075358073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,4096,0.0445248007774353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,16384,0.17026559511820477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,3584,0.03885333140691121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,2048,0.024178133408228556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,1024,0.013983999689420065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,768,0.01157973309357961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,1536,0.018866133689880372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,3072,0.03375786542892456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,2560,0.028930133581161498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,512,0.009204266468683879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,256,0.006905599931875865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,64,0.005100800096988678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,128,0.005738666653633118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,32,0.005294933418432872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,7168,0.059537065029144284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,8192,0.06627839803695679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,10240,0.08268906275431315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,12288,0.10079680283864338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,16384,0.12975040276845295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1536,65536,1.066528002421061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,6144,0.051507198810577394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,5120,0.0424778660138448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,4096,0.0343392014503479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,3584,0.030550400416056316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,3072,0.026690133412679035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,1536,0.014874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,1024,0.011342933773994446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,2560,0.02291733423868815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,2048,0.01853013237317403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,768,0.009437867005666097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,512,0.0076896001895268755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,64,0.004517333209514618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,128,0.005025066435337067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,256,0.005891199906667074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,32,0.004665599763393402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,7168,0.04208639860153198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,8192,0.04649279912312825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,10240,0.05811200141906738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,12288,0.07097813288370768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,1024,65536,0.6510570526123047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,16384,0.0908672014872233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,6144,0.03633280197779338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,4096,0.02400533358256022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,5120,0.029972267150878907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,3584,0.021215999126434328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,768,0.00730560024579366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,1024,0.00843519965807597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,2560,0.016030933459599814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,3072,0.01884373426437378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,2048,0.013229866822560629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,1536,0.010909866293271382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,128,0.004347733159859975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,512,0.00614933321873347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,256,0.004970666766166687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,32,0.004153600086768469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,64,0.003958400090535482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,7168,0.0281056006749471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,8192,0.03408746719360352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,10240,0.03772799968719483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,12288,0.04367893139521281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,768,65536,0.5118048032124837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,16384,0.05420159896214803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,6144,0.023795199394226075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,5120,0.02138239940007528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,4096,0.0188810666402181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,3584,0.016302933295567833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,768,0.005606399973233541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,3072,0.013516799608866373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,1024,0.006212266782919565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,2560,0.01176106631755829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,2048,0.009333333373069764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,1536,0.008213333288828532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,64,0.0033418667813142145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,512,0.004775466521581014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,128,0.003610666592915853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,256,0.004013866682847341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,32,0.003542399903138479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,7168,0.019108267625172932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,8192,0.022910932699839272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,10240,0.02980479995409648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,12288,0.033524266878763836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,512,65536,0.3588266690572103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,16384,0.041818666458129886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,6144,0.016489600141843162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,5120,0.011512533823649088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,3584,0.009423999985059103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,4096,0.00976213316122691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,3072,0.008257066706816356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,1024,0.00472320020198822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,768,0.004310399790604909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,2560,0.00713919997215271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,1536,0.005595733225345611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,2048,0.00631573349237442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,256,0.003491200009981791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,512,0.003869866579771042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,64,0.0030794667700926462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,128,0.0032149332265059153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,32,0.003306666761636734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,7168,0.017887999614079796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,8192,0.02440746625264486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,10240,0.028429865837097168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,12288,0.032152533531188965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,256,65536,0.19464000066121417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,16384,0.0406335989634196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,4096,0.008574933807055155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,6144,0.014763733744621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,5120,0.009912533561388652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,3584,0.00800960014263789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,3072,0.007136000196139018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,2048,0.006181333462397257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,2560,0.006888533135255177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,512,0.003671466559171677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,768,0.004093866546948751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,1024,0.004533333579699198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,1536,0.0053951998551686605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,256,0.0033471999069054925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,128,0.003156266609827677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,64,0.0029567999144395193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,32,0.0029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,7168,0.019732266664505005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,8192,0.022937599817911783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,10240,0.025849600632985432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,12288,0.029921066761016846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,128,65536,0.15447146097819012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,16384,0.03798720041910807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,6144,0.01616213321685791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,5120,0.011499733726183573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,4096,0.010780800382296245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,3584,0.00944640040397644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,3072,0.008629332979520161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,768,0.0037930667400360107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,1024,0.004136533290147781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,1536,0.004824533561865489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,2048,0.005420800050099691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,2560,0.006061866879463196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,512,0.00342399999499321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,256,0.003253333270549774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,128,0.003014400104681651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,64,0.002796799937884013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,32,0.0028885332246621448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,64,65536,0.14297173817952474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,6144,2.2223838806152343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4096,32,65536,0.13487253189086915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,7168,2.544342295328776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,8192,2.905517832438151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,5120,1.735873031616211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,10240,3.694812774658203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,3584,1.2404159545898437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,3072,1.0684981028238931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,2048,0.6990250905354818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,4096,1.448404312133789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,2560,0.9100234349568685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,1536,0.5381493250528971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,128,0.10067093372344971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,256,0.13726186752319336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,64,0.09480746587117514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,32,0.09528533617655435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,512,0.20882986386617025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,768,0.29073705673217776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,1024,0.36961708068847654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,12288,4.4309534708658855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,7168,0.5928223927815754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,6144,0.5225920041402181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,8192,0.68133544921875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,10240,0.838587760925293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,12288,1.034440549214681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,65536,16384,5.909149678548177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,4096,0.35087254842122395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,5120,0.42772480646769206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,3584,0.3055605252583822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,3072,0.2600938638051351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,2560,0.22520000139872232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,16384,1.3957215627034505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,256,0.03552853266398112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,2048,0.18003519376118976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,512,0.05496853192647298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,128,0.029633067051569623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,768,0.07582933108011881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,1024,0.09520320097605386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,1536,0.13825920422871907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,64,0.025465599695841473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,32,0.028034132719039918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,7168,0.45688425699869795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,6144,0.38486719131469727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,8192,0.5164949417114257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,10240,0.6314847946166993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,12288,0.7610325495402018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,4096,0.260318930943807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,5120,0.3296330769856771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,3584,0.23242559432983398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,3072,0.19688426653544108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,16384,1.0129269282023112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,1024,0.07402773698170981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,512,0.043338668346405027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,2560,0.16653226216634115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,1536,0.10523413022359211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,2048,0.1342367966969808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,768,0.05751573244730631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,128,0.021625600258509316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,256,0.028062933683395387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,32,0.021677867571512858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,64,0.02074986696243286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,7168,0.3788672129313151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,8192,0.43937279383341477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,10240,0.5422890981038411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,12288,0.640180269877116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,5120,0.27794132232666013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,16384,0.8473418553670248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,6144,0.32609599431355796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,4096,0.22324585914611816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,3584,0.1934165318806966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,1024,0.06276799837748209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,1536,0.08997440338134766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,3072,0.17101866404215496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,2048,0.11580159664154052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,768,0.050419199466705325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,2560,0.14124266306559247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,512,0.0378602663675944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,256,0.026418133576711016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,64,0.016708266735076905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,128,0.01990399956703186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,32,0.01704853375752767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,8192,0.33919572830200195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,10240,0.4247125307718913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,12288,0.5056991895039876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,16384,0.6876682917277018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,5120,0.2152405261993408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,7168,0.2976565361022949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,6144,0.25831359227498374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,16384,65536,5.973451741536459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,4096,0.17835839589436847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,3584,0.15364586512247722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,3072,0.13284160296122233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,2560,0.11290559768676758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,2048,0.0943061351776123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,1536,0.0714741309483846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,1024,0.05000426769256592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,512,0.030855466922124226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,256,0.020947200059890748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,768,0.04049280087153117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,64,0.01378666659196218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,128,0.016039466857910155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,32,0.01439466675122579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,12288,65536,4.390665690104166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,8192,0.2975381215413412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,10240,0.37774826685587565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,7168,0.2620394706726074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,12288,0.44151465098063153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,16384,0.5870496114095052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,3584,0.13147626717885336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,5120,0.19153599739074706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,4096,0.14951465924580892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,6144,0.22477760314941406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,10240,65536,3.686522674560547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,2560,0.09437653223673502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,1024,0.04147093296051026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,3072,0.11272532939910888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,2048,0.07829013665517172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,1536,0.05901439984639486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,768,0.032740267117818196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,64,0.012735999623934426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,512,0.023733333746592204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,256,0.01574399967988332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,128,0.013434666395187377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,32,0.013147733608881631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,7168,0.22325119972229004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,8192,0.25514559745788573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,10240,0.3193183898925781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,12288,0.3800373395284017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,8192,65536,2.910765838623047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,3584,0.11619626681009929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,16384,0.5165002822875977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,5120,0.16242772738138836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,4096,0.13230613072713215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,6144,0.19238613446553549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,2560,0.08501866658528646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,3072,0.10405759811401367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,1536,0.05367253224054972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,2048,0.0704639991124471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,1024,0.03931306600570679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,768,0.031956267356872556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,512,0.024503467480341594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,64,0.011329066753387452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,128,0.013270399967829385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,256,0.01743146578470866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,32,0.01176106631755829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,7168,0.18861440022786458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,8192,0.21718079249064126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,10240,0.26472427050272623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,12288,0.32529172897338865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,16384,0.42221546173095703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,4096,0.10946133136749267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,3584,0.09872639973958333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,5120,0.13791999816894532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,6144,0.16233280499776204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,7168,65536,2.5044522603352863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,3072,0.08368213176727295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,2560,0.07041172981262207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,2048,0.05858026742935181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,1024,0.032034132877985635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,1536,0.04525760014851888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,768,0.025995733340581258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,512,0.019678932428359986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,128,0.010072533289591472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,256,0.013522133231163025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,64,0.00860693355401357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,32,0.008933333555857341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,7168,0.15110613505045573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,8192,0.16982399622599284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,10240,0.20953280131022134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,12288,0.25742293993632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,16384,0.3331370671590169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,3584,0.07822186946868896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,6144,65536,2.1488416035970053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,4096,0.08681920369466146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,5120,0.10801599820454914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,6144,0.1302687962849935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,3072,0.06622080008188883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,2560,0.05706026554107666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,1536,0.03546559810638428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,2048,0.046291200319925944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,1024,0.025525333484013875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,768,0.020691200097401937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,256,0.01095360020796458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,512,0.016034133235613503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,128,0.008350933591524761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,64,0.007287466526031494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,32,0.007673599819342296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,7168,0.14174399375915528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,8192,0.1603648026784261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,10240,0.19877546628316242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,12288,0.23816960652669272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,5120,65536,1.7822186787923175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,16384,0.32342185974121096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,3584,0.07300586700439453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,4096,0.0824512004852295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,5120,0.1021386702855428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,6144,0.12233920097351074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,3072,0.06413760185241699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,2048,0.044265600045522054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,1536,0.034594134489695234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,2560,0.05442986488342285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,1024,0.025077333052953083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,768,0.020398932695388793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,256,0.010739200313886007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,512,0.015666133165359496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,64,0.007286400099595388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,128,0.008089600006739299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,32,0.007361066838105519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,7168,0.1155776023864746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,8192,0.13115306695302326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,10240,0.16730772654215495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,4096,65536,1.3865557352701823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,12288,0.19443413416544597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,16384,0.25907626152038576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,4096,0.06883306503295898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,5120,0.08343466917673746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,6144,0.09977813561757407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,3584,0.061492268244425455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,3072,0.0514357328414917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,2048,0.03577599922815959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,2560,0.04323413372039795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,1536,0.02762986620267232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,1024,0.020142932732899986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,768,0.0167413334051768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,512,0.012662399808565775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,256,0.008805333574612936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,128,0.0070165331164995836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,64,0.006101333101590474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,32,0.006268799801667531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,10240,0.13099093437194825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,7168,0.09311146736145019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,8192,0.10540160338083904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,12288,0.15997014045715333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3584,65536,1.276693344116211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,16384,0.209006929397583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,4096,0.05475093523661295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,6144,0.08041386604309082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,3584,0.04944106737772624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,5120,0.06770666440327963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,3072,0.043501865863800046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,1024,0.017607466379801432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,1536,0.0236735999584198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,768,0.014138666788736978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,512,0.011059199770291645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,2560,0.03643733263015747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,2048,0.029769599437713623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,256,0.008132266501585644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,128,0.006584533552328746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,32,0.006042666733264923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,64,0.005736533304055532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,7168,0.07408959865570068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,10240,0.10394026438395183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,8192,0.0856661319732666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,3072,65536,1.0355861028035482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,12288,0.1273760000864665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,16384,0.1640714645385742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,5120,0.053804798920949304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,6144,0.0640554666519165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,3584,0.039350398381551105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,4096,0.04455999930699666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,3072,0.03419093290964763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,512,0.009211732943852743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,768,0.0116074671347936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,2560,0.028860799471537274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,1024,0.014152533809343972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,2048,0.024151466290156045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,1536,0.019258666038513183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,256,0.006931200126806895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,128,0.005690666536490122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,32,0.005339733262856802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,64,0.0050335998336474095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,7168,0.057471998532613124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,8192,0.06602346499760946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,12288,0.09915946324666342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,10240,0.08268053531646728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2560,65536,0.8422869364420572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,16384,0.128439466158549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,4096,0.0346399982770284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,6144,0.04985599915186564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,3584,0.03038826584815979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,3072,0.026979200045267743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,5120,0.04239786863327026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,2048,0.01925440033276876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,2560,0.022788266340891518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,512,0.007821866869926452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,768,0.009429333607355754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,1536,0.01530239979426066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,1024,0.011199999849001567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,256,0.005940266450246175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,64,0.004538666705290476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,128,0.005016533533732096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,32,0.004681600133577982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,12288,0.06808319886525473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,7168,0.040965334574381514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,2048,65536,0.6505098978678385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,8192,0.0452565352121989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,10240,0.056270933151245116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,16384,0.08998933633168539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,6144,0.03565226793289185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,4096,0.02376426657040914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,3584,0.02086506684621175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,5120,0.029700267314910888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,3072,0.018554667631785073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,2048,0.01344000001748403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,2560,0.01595626672108968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,1024,0.008330666522185007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,1536,0.010796800255775452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,768,0.007181866466999054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,512,0.006093866626421611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,256,0.0048437332113583885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,64,0.003802666564782461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,128,0.0042346666256586705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,32,0.004053333401679992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1536,65536,0.5054890632629394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,7168,0.03128746747970581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,8192,0.03629973332087199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,12288,0.0538645346959432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,10240,0.04392213424046834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,6144,0.02704319953918457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,16384,0.06942293643951417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,3584,0.01698453426361084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,3072,0.014588800072669984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,4096,0.01845653255780538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,5120,0.02241706649462382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,2560,0.012750933567682901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,2048,0.01069546639919281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,1536,0.008962133526802063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,1024,0.006962133447329204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,512,0.005246933301289876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,256,0.00436160018046697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,768,0.00613973339398702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,64,0.0035605333745479585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,128,0.003902933249870936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,32,0.0037077332536379496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,1024,65536,0.33809067408243815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,7168,0.024710400899251302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,10240,0.032580266396204635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,8192,0.030041599273681642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,12288,0.040777599811553954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,16384,0.05095359881718954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,6144,0.02121493419011434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,4096,0.016938666502634682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,5120,0.018576000134150186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,3584,0.013981866836547851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,3072,0.011338667074839274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,2048,0.008437333504358928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,2560,0.01083733340104421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,1536,0.007549866537253062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,768,0.0053247998158137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,1024,0.00581333339214325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,512,0.004598399996757508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,256,0.003934933245182038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,128,0.0035605333745479585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,64,0.003319466610749563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,32,0.0035146666069825493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,768,65536,0.26959253946940104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,8192,0.0199178675810496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,7168,0.015172266960144043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,10240,0.024035199483235677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,12288,0.027708800633748372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,6144,0.013674666484196981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,16384,0.03130666613578796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,3584,0.008943999807039898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,5120,0.012008532881736755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,4096,0.009564800063769023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,3072,0.007853866616884867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,2560,0.00729066679875056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,1536,0.005541333556175232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,2048,0.006323199967543285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,1024,0.004732800026734671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,768,0.004341333111127218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,512,0.003885866701602936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,128,0.0032106667757034303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,256,0.0035125332574049628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,32,0.0031989333530267083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,64,0.0030517332255840302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,512,65536,0.17863787015279134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,8192,0.013986133535703025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,7168,0.011935999989509583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,10240,0.015849600235621132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,12288,0.02063680092493693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,6144,0.010731732845306397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,16384,0.025161600112915038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,3072,0.008162133395671844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,4096,0.010455466310183207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,3584,0.009413333733876546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,5120,0.010246400038401287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,2048,0.006118399898211161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,1536,0.005576533575852713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,2560,0.006965333223342895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,768,0.004238933324813843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,512,0.0037418665985266366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,1024,0.004654933512210846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,128,0.0031040000418821974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,32,0.0029898665845394133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,256,0.0033461332321166994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,64,0.002932266642649968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,256,65536,0.1048629363377889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,7168,0.01042133371035258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,6144,0.009621333082516987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,8192,0.011252267162005107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,12288,0.018257067600886027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,10240,0.013172266880671182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,16384,0.022832000255584718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,3072,0.007085866729418437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,2560,0.006448000172773997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,4096,0.008178133269151051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,3584,0.0074538667996724445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,5120,0.008845866719881693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,1536,0.005143466591835022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,2048,0.005627733469009399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,128,0.0029098667204380036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,128,65536,0.07845333417256674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,256,0.003221333275238673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,1024,0.004221866528193155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,64,0.0027488000690937043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,768,0.0038677332301934562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,512,0.0034634667138258614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,32,0.002841600030660629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,6144,0.008900266885757447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,7168,0.009758933385213216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,8192,0.010172800223032633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,10240,0.01099733312924703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,12288,0.016637866695721946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,64,65536,0.0734773317972819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,16384,0.02183039983113607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,5120,0.00829013337691625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,3584,0.00730453332265218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,4096,0.007451733450094859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,3072,0.0068234667181968685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,2560,0.006076799829800924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,1024,0.0041407999893029535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,512,0.003483733286460241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,2048,0.005413333574930826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,1536,0.004818133513132731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,768,0.003806933263937632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,64,0.0028437333802382152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,256,0.0031040000418821974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,128,0.002902399996916453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,32,0.002869333326816559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2048,32,65536,0.07006186644236247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,5120,0.9003946940104166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,6144,1.1035285949707032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,8192,1.4662314097086588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,7168,1.2775797526041666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,4096,0.7182687759399414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,3584,0.6266464233398438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,1536,0.2822112083435059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,3072,0.5539701461791993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,2048,0.37479146321614587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,2560,0.45535999933878585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,10240,1.7781450907389325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,128,0.053409067789713535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,64,0.05324693520863851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,1024,0.19419412612915038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,256,0.06632213195164999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,32,0.04921386639277141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,768,0.1528213342030843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,512,0.109279998143514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,12288,2.18920415242513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,7168,0.3092693328857422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,8192,0.3456927935282389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,10240,0.42772159576416013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,6144,0.26114986737569174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,12288,0.5190047899881999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,3584,0.15544212659200032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,4096,0.17716906865437826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,5120,0.22341653505961098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,65536,16384,3.06442133585612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,16384,0.6784053166707357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,3072,0.1336352030436198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,2560,0.11572053432464599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,768,0.04185386498769124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,2048,0.09236053625742593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,1536,0.07165119647979737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,1024,0.05157866477966309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,256,0.021808000405629475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,512,0.031922133763631184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,64,0.013951999942461648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,128,0.016687999169031777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,32,0.014212266604105631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,6144,0.1949504057566325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,7168,0.2300053278605143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,8192,0.26324373881022134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,10240,0.3193162600199381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,12288,0.37971626917521156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,5120,0.1636192003885905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,3584,0.11944533189137777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,4096,0.1346570650736491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,3072,0.10149119695027668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,16384,0.520360533396403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,1536,0.05524906714757284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,2048,0.07017280260721842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,1024,0.03966079950332642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,768,0.032178133726119995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,2560,0.08556053638458253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,512,0.02491413354873657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,256,0.017479467391967773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,128,0.013459199666976928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,64,0.011187199751536052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,32,0.011817600329717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,7168,0.18941973050435384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,8192,0.21997226079305016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,10240,0.2690666516621908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,12288,0.3293600082397461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,16384,0.43792212804158526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,4096,0.11119039853413899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,3584,0.0999882698059082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,5120,0.1398431936899821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,6144,0.1633855978647868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,1536,0.046166400114695236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,2560,0.07168213526407877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,1024,0.03211519916852315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,2048,0.059752531846364344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,3072,0.08680960337320963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,128,0.010219732920328777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,512,0.01963520050048828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,256,0.013749333222707114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,768,0.025753599405288697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,64,0.008719999591509502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,32,0.008980266253153483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,8192,0.17503573099772135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,10240,0.21845013300577798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,12288,0.258734925587972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,7168,0.15393385887145997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,16384,0.3372874577840169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,16384,65536,2.9198954264322916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,6144,0.1328778664271037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,5120,0.11091840267181396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,4096,0.09010026454925538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,3584,0.0776426633199056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,3072,0.06859839757283528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,1536,0.03672639926274617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,2560,0.05840746561686198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,768,0.020985599358876547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,1024,0.025729066133499144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,2048,0.04744960069656372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,12288,65536,2.1302752176920574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,512,0.016039466857910155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,128,0.008449066678682964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,256,0.011374933520952861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,64,0.0072970668474833175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,32,0.007608533402283986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,7168,0.14589440027872722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,8192,0.16188799540201823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,10240,0.20679999987284342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,12288,0.24149972597757974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,16384,0.32731199264526367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,10240,65536,1.7926624298095704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,6144,0.1258560021718343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,5120,0.10302826563517253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,3072,0.06532266537348429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,3584,0.07384533087412516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,4096,0.08548053105672201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,2560,0.0546175996462504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,1024,0.025301333268483477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,768,0.020474666357040407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,1536,0.03468266725540161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,2048,0.04483199914296468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,512,0.015662933389345803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,128,0.008441600203514098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,256,0.010860799749692281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,64,0.006961066524187725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,32,0.007410133381684621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,8192,65536,1.396499252319336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,8192,0.1333066701889038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,7168,0.11643199920654297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,10240,0.16422932942708332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,12288,0.20196587244669595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,6144,0.10190186500549317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,16384,0.26903680165608723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,3072,0.053155199686686194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,2560,0.044097065925598145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,4096,0.06831040382385253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,5120,0.08494719664255777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,3584,0.060105601946512856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,1024,0.020204800367355346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,2048,0.036906667550404865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,1536,0.0283242662747701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,768,0.01648533344268799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,512,0.012890666723251343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,256,0.008806399504343669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,128,0.00697813332080841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,64,0.006067200005054474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,32,0.006262399752934774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,7168,0.09676907062530518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,8192,0.11017706394195556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,10240,0.13368959426879884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,12288,0.1583829402923584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,7168,65536,1.311915715535482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,4096,0.05559680064519247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,16384,0.21176320711771646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,6144,0.08349653085072836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,5120,0.07040533224741617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,2560,0.036423468589782716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,2048,0.030004266897837324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,3584,0.04952213366826375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,3072,0.043703468640645345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,1536,0.02409279942512512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,1024,0.017723733186721803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,768,0.014620799819628397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,512,0.011477333307266236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,64,0.005861333509286245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,256,0.008150400221347808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,128,0.006646400193373363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,32,0.0060810665289560955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,7168,0.07742186387379965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,8192,0.08596266905466715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,12288,0.12921493053436278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,10240,0.10544640223185223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,4096,0.0453877329826355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,6144,65536,1.0677973429361978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,16384,0.16753600438435873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,5120,0.05518613258997599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,3584,0.0396234671274821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,6144,0.06677546501159667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,3072,0.0347978671391805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,2048,0.024253867069880166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,2560,0.029999999205271403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,512,0.009382399916648864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,768,0.012077866991360982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,1536,0.01959999998410543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,1024,0.014659200112024942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,64,0.005122133096059163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,256,0.007006933291753133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,32,0.005367466807365417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,128,0.005896533528963724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,5120,65536,0.8351317087809245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,8192,0.08043093681335449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,7168,0.07004373073577881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,12288,0.11834666728973389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,10240,0.09692800045013428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,16384,0.151911465326945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,3584,0.03555733362833659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,3072,0.030110933383305866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,4096,0.04017920096715291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,6144,0.05962986548741659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,5120,0.050514133771260586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,2048,0.021334399779637657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,2560,0.025681066513061523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,1536,0.016660267114639284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,256,0.005701333284378052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,1024,0.012341333429018657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,768,0.009950932860374451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,512,0.007820799946784973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,128,0.005128533144791921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,32,0.005323733389377594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,64,0.004878933231035868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,4096,65536,0.6624949137369792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,7168,0.05868373314539591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,6144,0.05125120083491007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,8192,0.06781333287556966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,10240,0.08204800287882487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,12288,0.10106453100840251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,4096,0.03466879924138387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,16384,0.13005226453145344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,3584,0.03123840093612671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,5120,0.04275519847869873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,3072,0.0271178662776947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,2560,0.023124267657597862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,1024,0.011821867028872172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,2048,0.01923946738243103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,768,0.009764267007509868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,1536,0.015503999590873719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,512,0.007684266567230225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,128,0.005049600203831991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,256,0.00589333325624466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,64,0.004463999966780345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,32,0.004636799792448679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3584,65536,0.6215157190958659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,8192,0.05728533267974854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,7168,0.05057173172632853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,12288,0.08640960057576498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,6144,0.044733866055806475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,10240,0.07264533042907714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,5120,0.036820268630981444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,16384,0.11075733502705891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,4096,0.03020799954732259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,2048,0.01736533244450887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,3072,0.02385279933611552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,3584,0.027220267057418823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,2560,0.020510933796564736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,768,0.00929813285668691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,1024,0.010994133353233338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,1536,0.014233600099881491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,256,0.005729066828886667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,512,0.007684266567230225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,128,0.004887466629346212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,64,0.004331733286380768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,32,0.004572799801826477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,3072,65536,0.5246954600016276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,8192,0.04469653367996216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,7168,0.03960853417714437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,10240,0.05710826714833578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,12288,0.06612480084101359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,5120,0.02872640093167623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,6144,0.035556264718373615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,16384,0.08976319630940756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,4096,0.023451733589172363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,3584,0.021221333742141725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,2560,0.016123732924461363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,3072,0.01872533361117045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,1536,0.01102186640103658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,2048,0.013608533143997192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,1024,0.008378666639328004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,512,0.006097066899140676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,128,0.004311466713746389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,256,0.004936533172925314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,768,0.0071168000499407455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,64,0.0038058665891488397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,32,0.004056533426046371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2560,65536,0.43146346410115555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,8192,0.03570559819539388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,7168,0.030694399277369184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,10240,0.04435840050379435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,6144,0.02657173275947571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,12288,0.051703464984893796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,16384,0.06970453262329102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,2048,0.010831999778747558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,4096,0.018504534165064493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,5120,0.022699733575185142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,3584,0.016662399967511496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,2560,0.013005866607030233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,3072,0.014708266655604044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,1536,0.00888213316599528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,1024,0.00699946681658427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,256,0.004398933549722036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,768,0.006163200239340464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,64,0.0037450666228930154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,512,0.00526506652434667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,128,0.003920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,32,0.003701333453257879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,2048,65536,0.3376511891682943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,12288,0.03996373414993286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,8192,0.026231465737024943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,10240,0.032077866792678836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,7168,0.023196800549825033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,6144,0.01994346578915914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,16384,0.051025064786275234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,5120,0.018515199422836304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,4096,0.015470932920773825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,3072,0.011685333649317424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,3584,0.013803733388582864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,2560,0.010346666971842448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,1536,0.00732479989528656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,2048,0.008417066931724549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,768,0.005262933174769084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,128,0.0035605333745479585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1536,65536,0.26598933537801106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,1024,0.0058112000425656635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,512,0.004578133424123129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,256,0.0038954667747020722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,64,0.00329066663980484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,32,0.0034602666894594826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,8192,0.023912533124287923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,7168,0.020317866404851278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,10240,0.02866133252779643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,16384,0.0394165317217509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,6144,0.016548267006874083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,12288,0.03143573403358459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,3072,0.009326933821042379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,2560,0.008387200037638346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,4096,0.013938132921854654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,2048,0.007053866485754649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,3584,0.011462400356928509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,5120,0.014332800110181173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,1536,0.006188799937566122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,1024,65536,0.17682347297668458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,64,0.00308693324526151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,1024,0.005103999873002371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,768,0.004645333190759023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,512,0.0040832000474135075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,256,0.003622400015592575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,128,0.0032831999162832894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,32,0.0032479998966058097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,6144,0.01300266683101654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,7168,0.014625066518783569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,8192,0.020486400524775187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,10240,0.022461867332458495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,12288,0.024158932765324912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,16384,0.029269333680470782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,5120,0.011534933249155681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,4096,0.009853866696357728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,3584,0.009385599692662557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,3072,0.007826133569081625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,768,65536,0.1410304069519043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,2048,0.006276266773541768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,2560,0.007090133428573608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,256,0.003492266684770584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,1536,0.005628799895445505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,768,0.004297600189844767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,128,0.0031744000812371576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,512,0.0038954667747020722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,1024,0.004698666433493296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,32,0.003230933348337809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,64,0.0030165334542592366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,7168,0.012057600418726604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,6144,0.010710400342941285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,8192,0.013117866714795432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,12288,0.017454934120178223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,10240,0.0151829332113266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,16384,0.020616533358891805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,5120,0.01029973328113556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,512,65536,0.09563626448313395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,4096,0.00951039989789327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,3584,0.008130133152008057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,3072,0.0071050668756167095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,2560,0.007031466563542683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,2048,0.006303999821345012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,1536,0.005773866673310598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,1024,0.0048981333772341405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,32,0.003053866575161616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,256,0.0033333333830038703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,64,0.0029653333127498626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,512,0.0038399999340375268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,768,0.004343466460704803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,128,0.0030559999247392017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,7168,0.009155199925104777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,6144,0.008541867136955261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,10240,0.009648000200589497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,8192,0.009591466188430786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,16384,0.013648000359535218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,12288,0.01018773317337036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,256,65536,0.05486933390299479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,2560,0.006390400230884552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,3584,0.008150400221347808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,2048,0.005659733215967814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,4096,0.00755626658598582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,5120,0.008116266628106435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,3072,0.007657599945863088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,1024,0.004164266586303711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,1536,0.0050911997755368555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,32,0.0029525332152843474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,512,0.0035071998834609987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,768,0.0038677332301934562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,256,0.0031871999303499854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,128,0.002940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,64,0.0028394666810830434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,6144,0.007715199887752533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,7168,0.008270933230717977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,8192,0.008687999844551087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,128,65536,0.04327679872512817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,12288,0.009916800260543823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,10240,0.009258666634559631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,16384,0.011425066987673442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,3072,0.006903466582298279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,1536,0.005379199981689453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,2560,0.00619946668545405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,2048,0.005496533215045929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,3584,0.007264000177383423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,5120,0.0072629332542419435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,4096,0.006852266689141591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,768,0.004005333284536997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,1024,0.00439680020014445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,512,0.0035274667044480645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,256,0.0032672000428040824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,128,0.0030250666042168934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,64,0.0028309332827727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,32,0.00297173336148262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,6144,0.006647466619809468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,7168,0.006942933301130931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,16384,0.010479999581972758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,8192,0.007613866527875264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,12288,0.009274666508038838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,10240,0.008589866757392883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,64,65536,0.03982186714808146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,3584,0.007144533097743988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,2560,0.0060479998588562015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,3072,0.006806399921576183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,4096,0.006571733454863231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,5120,0.006977066894372304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,2048,0.005366399884223938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,1536,0.0047423998514811196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,1024,0.004002133260170618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,768,0.0036544000109036768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,32,0.002735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,512,0.003340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,64,0.0026528000831604003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,128,0.002796799937884013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,256,0.002980266759792964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1024,32,65536,0.03882666826248169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,4096,0.5330709457397461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,5120,0.6432863871256511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,6144,0.7844863891601562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,7168,0.9218922932942709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,8192,1.0239199956258138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,3584,0.4687466621398926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,3072,0.39412161509195964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,2560,0.3394389470418294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,2048,0.2772063891092936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,1024,0.14758720397949218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,10240,1.338921610514323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,1536,0.20825279553731285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,768,0.11379626592000325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,512,0.08452479839324951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,128,0.04127466678619385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,256,0.05179946819941202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,64,0.040421334902445476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,32,0.038320000966389975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,12288,1.6007797241210937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,7168,0.22608960469563805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,8192,0.25966293017069497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,10240,0.3281344095865885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,6144,0.19760640462239581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,12288,0.39144001007080076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,65536,16384,2.1384363810221356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,4096,0.13353813489278157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,5120,0.1635317325592041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,2560,0.08800000349680583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,2048,0.07164373397827148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,16384,0.516926924387614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,3584,0.12012159824371338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,3072,0.10193599859873455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,512,0.02469546596209208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,768,0.03238720099131266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,1536,0.055224533875783285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,256,0.017771732807159425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,128,0.013300266861915589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,64,0.01160426636536916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,1024,0.039485867818196616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,32,0.011827199657758077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,6144,0.14423573811848958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,7168,0.16673280398050944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,8192,0.19051520029703778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,10240,0.24282986323038735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,5120,0.12096746762593587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,12288,0.2873013178507487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,4096,0.09774293104807535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,3584,0.08658986886342367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,2560,0.06353813409805298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,16384,0.3770581245422363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,3072,0.07687359650929769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,2048,0.05280640125274658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,1024,0.02877333362897237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,768,0.023457066218058268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,1536,0.04020373423894246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,512,0.01832533280054728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,256,0.012810666362444559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,32,0.008690133690834045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,64,0.008286933104197185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,128,0.009744000434875489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,7168,0.1489781379699707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,10240,0.2065429369608561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,8192,0.16632426579793294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,12288,0.25378452936808266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,5120,0.10566720167795818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,16384,0.3305247942606608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,4096,0.08565119902292886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,6144,0.12836159865061442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,3584,0.07746666272481283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,3072,0.06559466520945231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,2048,0.045813333988189694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,2560,0.05605226755142212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,1536,0.03578026692072551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,1024,0.025621332724889118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,768,0.020510933796564736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,512,0.016639999548594155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,256,0.0132533331712087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,32,0.011244799693425496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,128,0.011206400394439698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,64,0.01087679962317149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,8192,0.13743467330932618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,12288,0.1985482692718506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,10240,0.16920533180236816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,16384,0.26506239573160806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,16384,65536,2.1543337504069013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,6144,0.10110399723052979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,7168,0.12068053086598714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,5120,0.08660799662272135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,4096,0.06866133213043213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,3072,0.05227200190226237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,3584,0.0604970653851827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,2560,0.045262932777404785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,1024,0.02044693430264791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,2048,0.03653653462727864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,12288,65536,1.5906667073567708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,1536,0.0281056006749471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,256,0.009108266234397889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,512,0.01309333344300588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,768,0.016693333784739174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,128,0.007342933118343354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,64,0.006457599997520447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,32,0.006696533163388569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,8192,0.11799039840698242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,7168,0.10244159698486328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,10240,0.14567467371622722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,10240,65536,1.301019795735677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,12288,0.1703210671742757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,16384,0.23322025934855142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,6144,0.0895242691040039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,5120,0.07455893357594809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,4096,0.06094186703364054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,3584,0.053275732199350986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,2048,0.031897600491841635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,3072,0.04535146554311116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,1536,0.02456000049908956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,2560,0.03938026825586955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,512,0.011142399907112122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,1024,0.01765973369280497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,256,0.007986133297284443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,768,0.014791466792424521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,128,0.007375999788443248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,32,0.006737066805362702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,64,0.006168533364931742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,8192,0.09735146363576254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,7168,0.0860042651494344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,8192,65536,1.0539978663126628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,10240,0.11894079844156902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,12288,0.14067519505818685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,4096,0.05046506722768148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,6144,0.07389439741770426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,5120,0.06261440118153891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,16384,0.1859242598215739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,3584,0.04420479933420817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,3072,0.0382698655128479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,2048,0.027383466561635334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,2560,0.032569599151611325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,1536,0.02153173287709554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,768,0.013485866785049438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,1024,0.016149333119392394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,256,0.0075989335775375364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,512,0.01032319962978363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,128,0.006356266637643178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,64,0.00544213354587555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,32,0.005810133119424184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,7168,65536,0.9112639745076498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,7168,0.07454826831817626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,8192,0.08610560099283854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,10240,0.10448853174845378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,12288,0.12407146294911701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,16384,0.16816213925679524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,2560,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,4096,0.044794666767120364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,5120,0.05440320173899332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,3072,0.03380053440729777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,6144,0.064846932888031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,3584,0.039026133219401044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,512,0.00941439966360728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,2048,0.024201599756876628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,1024,0.014659200112024942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,1536,0.01932906707127889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,256,0.007030400137106578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,768,0.012180266777674358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,128,0.0058218667904535925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,64,0.005709866682688395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,32,0.005692799886067709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,8192,0.0679807980855306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,7168,0.058789332707722984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,10240,0.08249920209248861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,6144,65536,0.7484373092651367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,12288,0.0977183977762858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,16384,0.13000746568044025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,4096,0.03463040192921956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,3584,0.0318015992641449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,3072,0.026800000667572023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,5120,0.04312853415807088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,6144,0.05164053440093994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,2048,0.01924906571706136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,2560,0.022911999622980753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,256,0.005980800092220307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,1024,0.0116757333278656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,512,0.007720533510049183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,1536,0.015379200379053751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,768,0.009724799791971843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,128,0.005093333125114441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,64,0.004457599918047587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,32,0.004717866579691568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,8192,0.05873599847157797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,5120,65536,0.6433162689208984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,7168,0.05163946549097696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,10240,0.0727189302444458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,12288,0.0867466688156128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,16384,0.11615040302276611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,6144,0.044921600818634035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,4096,0.03091946641604106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,5120,0.038224001725514725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,3584,0.02752959926923116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,3072,0.024251733223597208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,2048,0.017780266205469766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,2560,0.020950400829315187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,768,0.009367466966311137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,1536,0.01444906691710154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,256,0.006133333345254262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,512,0.00767146646976471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,1024,0.011329066753387452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,32,0.0051466668645540874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,64,0.004894933104515076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,128,0.005308799942334493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,4096,65536,0.5258570671081543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,12288,0.0742847998936971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,7168,0.045476265748341876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,8192,0.05081280072530111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,10240,0.06271040042241414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,6144,0.04032426675160726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,16384,0.09927573204040527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,3584,0.02376000086466471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,4096,0.027081600824991864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,3072,0.0206389327843984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,2048,0.015159466862678527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,5120,0.03254080017407735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,2560,0.018026665846506754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,1536,0.012434132893880208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,768,0.007851733267307282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,512,0.006604800124963124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,1024,0.009379200140635173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,256,0.0051701332132021586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,128,0.0044821331898371375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3584,65536,0.4592447916666667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,64,0.004073599974314371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,32,0.004238933324813843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,6144,0.03373759984970093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,8192,0.04553600152333577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,10240,0.05567040046056112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,7168,0.03911679983139038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,12288,0.06410986582438151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,4096,0.023578667640686037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,16384,0.08668373425801595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,5120,0.02840533256530762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,3072,0.018402133385340372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,3584,0.020938666661580403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,2048,0.013678933183352152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,2560,0.016065067052841185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,1024,0.008758399883906047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,1536,0.011322666207949321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,3072,65536,0.3827199935913086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,256,0.004936533172925314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,768,0.007326933244864146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,128,0.004267733295758565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,512,0.006062933305899302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,64,0.004002133260170618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,32,0.0040832000474135075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,7168,0.035419734319051106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,8192,0.040538668632507324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,6144,0.028537599245707194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,16384,0.07200640042622884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,12288,0.057049600283304844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,10240,0.048844798405965166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,3072,0.016377600034077962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,5120,0.024919466177622477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,3584,0.019339734315872194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,4096,0.02123840053876241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,2560,0.012981333335240684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,2048,0.011202133695284526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,1536,0.009607467055320739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,1024,0.007035733262697856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,512,0.005248000224431356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,64,0.003589333345492681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,256,0.004347733159859975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,768,0.0061471998691558834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,128,0.003884800026814143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2560,65536,0.335038948059082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,32,0.003722666700681051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,7168,0.025860265890757246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,6144,0.022072533766428627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,10240,0.04015680154164632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,8192,0.028731733560562134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,12288,0.04581866661707561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,16384,0.058456532160441076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,2048,0.011336533228556316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,3072,0.012874666849772134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,5120,0.018978132804234823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,3584,0.014443733294804893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,4096,0.015828266739845276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,2560,0.012637866536776224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,1536,0.008975999553998311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,2048,65536,0.2607850710550944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,1024,0.00697920024394989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,128,0.0036309334139029183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,768,0.0060586666067441305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,256,0.00409706657131513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,512,0.005050666630268097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,64,0.0034986667335033415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,32,0.0034933333595593774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,8192,0.032015999158223465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,6144,0.02318506638209025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,7168,0.025809067487716674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,12288,0.03171306649843852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,10240,0.037367467085520426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,16384,0.039876266320546465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,4096,0.017540266116460167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,3584,0.014719999829928079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,5120,0.019777067502339683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,3072,0.012889599800109864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,2560,0.008476799726486206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,2048,0.007124266525109608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,1536,0.006298666695753734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1536,65536,0.20210026105244955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,1024,0.005331199864546458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,768,0.004772266745567322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,512,0.004240000247955322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,256,0.0036618667344252265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,64,0.0031669333577156065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,128,0.0033674667278925574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,32,0.003306666761636734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,8192,0.01867199937502543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,7168,0.01662613352139791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,12288,0.026873600482940675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,6144,0.0147189329067866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,10240,0.02400533358256022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,16384,0.033181866010030106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,5120,0.013126400113105775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,1536,0.00719893326361974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,3072,0.00909440020720164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,4096,0.01111466685930888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,3584,0.010321066776911417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,1024,65536,0.13738560676574707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,2560,0.009677867094675701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,2048,0.008611200253168742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,1024,0.005693866809209188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,768,0.004985600213209788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,256,0.0036085332433382668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,512,0.004311466713746389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,128,0.0032138665517171226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,64,0.00297173336148262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,32,0.0029834667841593427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,10240,0.018794665733973183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,7168,0.01428053379058838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,12288,0.01975040038426717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,8192,0.015756799777348836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,6144,0.012777599692344665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,16384,0.025385600328445435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,768,65536,0.10771839618682862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,1536,0.00555626650651296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,5120,0.011179733276367187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,2048,0.006279466549555461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,3584,0.008737066388130188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,4096,0.00955733358860016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,3072,0.007795199751853943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,2560,0.0071050668756167095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,1024,0.004678399860858917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,512,0.003835733234882355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,256,0.0034645333886146545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,768,0.0042805333932240805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,128,0.003126399964094162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,64,0.0029898665845394133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,32,0.002995199958483378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,512,65536,0.07549653053283692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,10240,0.013729066650072733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,6144,0.01030399998029073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,7168,0.011277866363525391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,8192,0.011975466211636861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,12288,0.01548799971739451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,16384,0.015134933590888976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,5120,0.009612799684206644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,2048,0.005726933479309082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,4096,0.00849173367023468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,3072,0.007156266768773396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,2560,0.006457599997520447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,3584,0.007968000074227651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,1536,0.00506986677646637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,1024,0.004279466470082601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,768,0.003982933362325033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,512,0.003549866626660029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,32,0.002918400118748347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,128,0.002921599894762039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,256,0.0031968000034491217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,64,0.0028597332537174227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,256,65536,0.04597226778666179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,6144,0.007639466722806294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,10240,0.009272533655166625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,7168,0.008026666442553202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,12288,0.009993599851926167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,8192,0.00840106705824534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,16384,0.011959466338157653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,5120,0.008572799960772197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,1024,0.004153600086768469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,2560,0.00624533345301946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,3584,0.00757120003302892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,2048,0.005555200080076853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,1536,0.004922666649023692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,3072,0.007136000196139018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,4096,0.007794133325417836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,768,0.0038133333126703895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,256,0.0031818665564060213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,128,0.002825599908828735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,512,0.0034304000437259674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,64,0.00276799996693929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,32,0.0028266665836175283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,128,65536,0.03687573273976644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,5120,0.007159466544787089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,7168,0.007225599884986877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,8192,0.00751146674156189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,6144,0.006794666747252147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,10240,0.00820479989051819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,16384,0.00951039989789327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,12288,0.008923733234405517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,1536,0.004772266745567322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,4096,0.006694399813810985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,3584,0.007180800040562947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,2048,0.005426133175690969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,1024,0.004040533304214477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,3072,0.006729599833488464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,2560,0.006098133325576782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,768,0.0037119999527931214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,512,0.0033301333586374915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,256,0.003078400095303853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,32,0.0027797333896160126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,64,0.002749866743882497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,128,0.0028885332246621448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,64,65536,0.03335040012995402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,12288,0.0078005333741505934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,8192,0.00699946681658427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,6144,0.006621866424878438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,7168,0.00684799998998642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,10240,0.007269333302974701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,16384,0.008803199728329976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,5120,0.0072970668474833175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,4096,0.006837333242098491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,1536,0.0046858668327331545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,2048,0.005374933282534281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,3584,0.007221333185831706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,2560,0.006026666859785716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,3072,0.006858666737874349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,512,0.003369600077470144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,768,0.0036127999424934386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,1024,0.003995733211437861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,256,0.0029919999341169994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,128,0.0027850667635599775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,64,0.0026709333062171934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,32,0.0027978666126728057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,768,32,65536,0.03150826692581177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,4096,0.3535146713256836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,5120,0.4407840092976888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,6144,0.5406869252522786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,7168,0.6140490849812825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,8192,0.7021013259887695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,3072,0.26989332834879554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,3584,0.3111680030822754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,2560,0.23388800621032715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,2048,0.18669226964314778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,1536,0.1452191988627116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,10240,0.885258674621582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,512,0.05919466813405355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,768,0.07850560347239176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,1024,0.10063893000284832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,256,0.037444265683492024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,128,0.030271999041239422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,12288,1.041642697652181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,64,0.028175999720891316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,32,0.02675519982973735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,6144,0.13626880645751954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,8192,0.17735466957092286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,7168,0.15683733622233073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,10240,0.21858132680257164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,12288,0.25933972994486487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,4096,0.08990399837493897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,3584,0.08078506787618002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,5120,0.11426346302032471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,3072,0.06907306512196859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,16384,0.34480533599853513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,2560,0.05844480196634928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,2048,0.04767466783523559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,65536,16384,1.419438934326172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,768,0.02132479945818583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,1536,0.03781439860661824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,128,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,256,0.011502933502197266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,1024,0.02714879910151164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,512,0.016394666830698647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,64,0.007286400099595388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,32,0.007692799965540569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,6144,0.10346133708953857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,7168,0.12210559844970703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,8192,0.13572907447814941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,10240,0.17373119990030925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,12288,0.20232106844584147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,4096,0.07161493301391601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,3584,0.061509335041046144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,16384,0.26823466618855796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,5120,0.08698773384094238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,3072,0.055086934566497804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,1024,0.020806399981180827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,2560,0.0455135981241862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,768,0.016839466492335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,2048,0.03780906597773234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,512,0.013140267133712769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,256,0.009264000256856282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,1536,0.02881493369738261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,128,0.007092266778151194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,32,0.006318933268388112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,64,0.006085333228111267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,8192,0.11370666821797688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,7168,0.09806613127390543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,10240,0.13787306149800618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,12288,0.16436692873636882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,16384,0.2184234619140625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,5120,0.07137173016866048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,6144,0.08643199602762858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,4096,0.05748586654663086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,3584,0.05074453353881836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,3072,0.044395732879638675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,2048,0.031203200419743855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,1536,0.024244266748428344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,2560,0.037638401985168456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,1024,0.01784106691678365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,512,0.011742933591206869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,768,0.014803199966748556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,256,0.008189866443475087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,128,0.00664213349421819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,32,0.00624533345301946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,64,0.005773866673310598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,8192,0.0880885362625122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,10240,0.1119381348292033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,12288,0.13082666397094728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,16384,0.17255786259969075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,6144,0.06864319642384847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,16384,65536,1.3938826243082683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,7168,0.07755413055419921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,5120,0.05685333410898844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,4096,0.04581760168075562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,3584,0.0412831981976827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,3072,0.035596799850463864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,2560,0.02993173400561015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,1536,0.019697066148122153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,2048,0.024965333938598632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,1024,0.014758400122324624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,64,0.005190399785836538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,12288,65536,1.0923552195231119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,768,0.012300800283749897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,512,0.009886933366457622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,256,0.006941866874694824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,128,0.005791999896367391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,32,0.005411200225353241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,7168,0.07191039721171061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,8192,0.08045972983042399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,10240,0.0990773359934489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,12288,0.11784319877624512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,10240,65536,0.8916117350260416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,6144,0.062293334801991784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,16384,0.155512539545695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,5120,0.05114986499150594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,4096,0.04122346639633179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,3584,0.036115201314290364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,3072,0.032657066980997726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,1536,0.017044266064961754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,2560,0.027038933833440144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,2048,0.021590399742126464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,1024,0.012616533041000366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,768,0.01046720047791799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,128,0.005072000126043955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,512,0.008332799871762593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,256,0.0056970665852228795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,32,0.005226666728655497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,64,0.004990933338801066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,8192,65536,0.6857450485229493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,6144,0.05298666556676229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,7168,0.05990720192591349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,8192,0.06869226296742757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,10240,0.08553280035654703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,12288,0.10387519995371501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,16384,0.13687465985616049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,5120,0.04397759834925334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,3584,0.031640533606211343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,3072,0.027753599484761554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,2560,0.023891200621922813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,4096,0.03548906644185384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,2048,0.01960106690724691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,128,0.005055999755859375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,1536,0.015682133038838704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,768,0.010205866893132527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,1024,0.011854933698972066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,512,0.007981866598129272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,256,0.00588266650835673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,64,0.004538666705290476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,32,0.004699733356634776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,7168,0.051734399795532224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,8192,0.05978133281071981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,10240,0.07200319766998291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,12288,0.08898239930470785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,7168,65536,0.6431498845418294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,16384,0.11420160134633381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,5120,0.03856213490168254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,6144,0.04490026632944743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,4096,0.03069973389307658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,3072,0.024919466177622477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,2560,0.020645334323247274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,3584,0.028198399146397907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,1536,0.014409599701563516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,2048,0.017571200927098594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,512,0.007570133109887441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,768,0.009765332937240601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,1024,0.01123413344224294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,256,0.005987200140953064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,128,0.00489279975493749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,64,0.0044383997718493145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,32,0.004578133424123129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,8192,0.04690986474355062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,6144,65536,0.5290421485900879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,10240,0.05665386517842611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,7168,0.04055893421173096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,12288,0.06919466654459636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,4096,0.024180267254511514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,16384,0.08832746346791585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,3584,0.021448532740275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,5120,0.030154667297999066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,6144,0.03588693141937256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,2560,0.016272000471750894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,3072,0.01903466582298279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,2048,0.013972266515096029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,1536,0.011509333054224651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,512,0.006037333110968272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,1024,0.00870293378829956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,768,0.007337599992752075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,256,0.004863999783992767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,32,0.004073599974314371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,64,0.0038442666331926978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,128,0.004315733412901561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,5120,65536,0.4489343961079915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,10240,0.050253868103027344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,8192,0.04068160057067871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,7168,0.03567999998728434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,16384,0.08102933565775552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,12288,0.061358932654062906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,6144,0.03191893299420674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,5120,0.026348799467086792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,4096,0.0214901328086853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,3072,0.01704746683438619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,3584,0.0191648006439209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,2560,0.014738133549690247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,2048,0.012688000003496805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,768,0.006791466474533081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,1536,0.010409599542617798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,1024,0.007863466441631318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,512,0.005676800012588501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,64,0.0037664001186688742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,256,0.004590933521588644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,128,0.00417493333419164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,32,0.004035199930270513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,4096,65536,0.3582506815592448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,7168,0.03155626654624939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,8192,0.03582293192545573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,12288,0.053396264712015785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,10240,0.045111465454101565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,6144,0.028142933050791425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,16384,0.06815679868062338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,5120,0.022997333606084188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,4096,0.01889066696166992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,3584,0.017072000106175742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,3072,0.015031466881434122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,2560,0.0130431999762853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,1536,0.00918293297290802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,2048,0.011310933033625285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,768,0.006223999957243601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,1024,0.007309866448243459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3584,65536,0.32170559565226237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,32,0.0038101332883040107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,512,0.0052490666508674625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,256,0.004392533500989278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,128,0.0038890667259693147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,64,0.0035861333211263022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,7168,0.026877866188685103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,6144,0.023181867599487305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,8192,0.031100799640019734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,10240,0.03854506810506185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,5120,0.01973973313967387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,12288,0.04573546648025513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,16384,0.059102932612101235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,4096,0.016292267044385276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,2560,0.011590400338172912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,3584,0.01472106675306956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,2048,0.010059733192125957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,3072,0.013219199577967324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,768,0.005764266848564148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,1024,0.0064085334539413456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,1536,0.008157866696516674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,3072,65536,0.27544854482014974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,256,0.004216533402601878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,512,0.004942933221658071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,128,0.003770666569471359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,64,0.003458133339881897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,32,0.0036544000109036768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,7168,0.02373440066973368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,6144,0.02123946746190389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,10240,0.03265173236529033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,12288,0.04050986766815186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,8192,0.030713599920272828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,16384,0.05128959814707438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,5120,0.018722132841746012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,3584,0.014122666915257773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,1536,0.0076789334416389465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,4096,0.017374932765960693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,3072,0.012578133742014566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,2048,0.009413333733876546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,2560,0.01159999966621399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2560,65536,0.2291872024536133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,768,0.005212800204753875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,256,0.004012800008058548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,1024,0.005826133489608765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,128,0.0035594666997591654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,512,0.004548266530036926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,32,0.0035114665826161706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,64,0.0032501332461833954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,10240,0.02941653331120809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,7168,0.020309333006540933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,6144,0.01848213275273641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,8192,0.02519039909044902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,16384,0.04061439832051595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,12288,0.0320032000541687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,2560,0.009738666812578838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,4096,0.014393599828084311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,3584,0.012478933731714884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,5120,0.015742933750152587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,2048,0.00754559983809789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,3072,0.01036906639734904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,2048,65536,0.17946240107218425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,1536,0.0065760001540184024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,32,0.0032629333436489105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,512,0.004101333270470301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,1024,0.005272533496220907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,256,0.0035807999471823373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,768,0.004676266511281332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,128,0.0032970666885375976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,64,0.0031146667897701263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,7168,0.014564266800880432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,6144,0.013242666920026144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,8192,0.02044373353322347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,12288,0.023729066054026283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,10240,0.021041067441304524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1536,65536,0.13974827130635578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,16384,0.029267199834187824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,2048,0.0063360000650088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,3072,0.008141866823037466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,5120,0.011398399869600933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,4096,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,3584,0.009656533598899841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,2560,0.007142400244871776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,1536,0.00557226687669754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,1024,0.004714666803677877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,768,0.004388266801834106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,512,0.0038677332301934562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,256,0.00346666673819224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,128,0.003160533308982849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,64,0.0029866665601730345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,32,0.0031082667410373688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,6144,0.012716799974441528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,1024,65536,0.09794879754384359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,8192,0.017941333850224814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,7168,0.014286933342615762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,16384,0.023858133951822916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,10240,0.018796799580256145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,12288,0.019323732455571493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,4096,0.009750399986902874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,5120,0.011175466577212016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,2560,0.007143466671307881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,3072,0.007985066870848339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,1536,0.005542399982611338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,2048,0.0062282666563987735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,3584,0.009079466263453167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,1024,0.004665599763393402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,768,0.004264533519744873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,512,0.00384853333234787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,256,0.0033887999753157297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,128,0.003124266614516576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,64,0.002940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,32,0.0029834667841593427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,768,65536,0.07492907047271728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,5120,0.0100490669409434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,12288,0.01565226713816325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,16384,0.019048533837000527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,7168,0.01225920021533966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,6144,0.011341866850852967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,8192,0.012983466188112894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,10240,0.014037332932154336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,4096,0.008872532844543457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,3584,0.007950933277606964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,3072,0.007030400137106578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,768,0.004328533510367076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,2560,0.0069567998250325514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,2048,0.006198399762312571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,1536,0.005555200080076853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,1024,0.004833066463470459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,128,0.0030858665704727173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,512,0.0038101332883040107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,256,0.0033290666838486993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,64,0.0029386666913827257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,32,0.0029909332593282064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,512,65536,0.053106133143107095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,16384,0.01269760032494863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,12288,0.010924800237019857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,5120,0.008214400211970011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,8192,0.009760000308354696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,10240,0.009556266665458679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,6144,0.00862613320350647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,7168,0.009300266702969868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,3072,0.007234133283297221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,3584,0.0075647999842961625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,4096,0.007624533275763195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,2560,0.006563200056552887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,2048,0.005885866781075796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,1536,0.005172266562779745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,1024,0.004386133452256521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,768,0.003991466760635376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,512,0.0035584000249703727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,128,0.002974933385848999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,256,0.0031466667850812277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,64,0.002754133443037669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,32,0.0028864001234372456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,256,65536,0.034949334462483723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,12288,0.008893866340319316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,16384,0.01016426682472229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,10240,0.008327466746171314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,7168,0.008337066570917765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,8192,0.00872213343779246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,6144,0.0077450667818387345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,5120,0.007320533196131389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,4096,0.006867200136184692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,3584,0.00732479989528656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,2560,0.006275199850400289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,3072,0.006910933554172516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,2048,0.00556160012880961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,1536,0.0048767998814582825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,512,0.003554133325815201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,1024,0.004069333275159201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,768,0.0038250667353471124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,65536,0.028078933556874592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,256,0.003219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,128,0.002899199972550074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,64,0.0028437333802382152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,128,32,0.002961066613594691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,12288,0.007975466549396515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,16384,0.008929066856702169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,7168,0.006913066903750102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,8192,0.00709440012772878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,10240,0.007485866546630859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,4096,0.006585599978764851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,5120,0.0070271998643875126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,6144,0.00660693347454071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,1536,0.004645333190759023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,3584,0.007068799932797749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,3072,0.0066890666882197065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,2560,0.006006399790445963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,2048,0.005334400137265523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,1024,0.003949866692225138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,768,0.0036544000109036768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,512,0.0032821332414944967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,65536,0.02441706657409668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,128,0.002792533238728841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,64,0.002678400029738744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,256,0.0029546665648619336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,64,32,0.002713600049416224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,12288,0.00703359991312027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,10240,0.007109333574771881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,7168,0.0072053333123524976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,8192,0.00688213308652242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,16384,0.007663999994595845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,6144,0.006620799998442332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,5120,0.007001600166161854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,4096,0.006649599969387054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,3584,0.007113599777221679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,3072,0.006783999999364217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,2560,0.006004266440868378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,1536,0.0046517332394917805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,2048,0.005320533116658529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,1024,0.004010666658480962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,512,0.003336533407370249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,768,0.0036309334139029183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,256,0.0030303999781608583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,128,0.0028042666614055633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,32,0.0026709333062171934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,65536,0.02253119945526123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,512,32,64,0.0026922665536403658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,4096,0.27334187825520834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,5120,0.3508960088094076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,6144,0.4159680048624675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,7168,0.47390934626261394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,8192,0.5437130610148112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,3584,0.2404618740081787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,3072,0.21561172803243003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,10240,0.6708992004394532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,2560,0.18109013239542643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,2048,0.1467957337697347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,512,0.047585066159566244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,64,0.021565866470336915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,256,0.034473601977030435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,1536,0.11444693406422932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,1024,0.0784991979598999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,768,0.06509333451588949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,128,0.025862399737040204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,32,0.02249493400255839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,12288,0.8273312250773112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,7168,0.12071146965026855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,8192,0.1377130667368571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,10240,0.17548799514770508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,6144,0.10810133616129558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,12288,0.2027029355367025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,5120,0.08749333222707113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,4096,0.07127040227254232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,3072,0.055657601356506346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,16384,0.27227519353230795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,3584,0.0641322652498881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,2048,0.03771946827570598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,2560,0.04585920174916585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,1536,0.029509333769480388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,256,0.009753599762916565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,768,0.0172160009543101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,65536,16384,1.063803736368815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,512,0.013214932878812155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,1024,0.02114773392677307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,128,0.007283199826876323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,32,0.006754133105278015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,64,0.006369066735108693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,7168,0.0879850705464681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,6144,0.0763871987660726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,8192,0.09950400193532308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,10240,0.12709866364796957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,12288,0.15188053448994954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,4096,0.05221013228098551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,5120,0.06404159863789877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,3072,0.040827735265096025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,3584,0.04564906756083171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,16384,0.1979039986928304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,1024,0.016594133774439492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,1536,0.02211093306541443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,768,0.013762133320172629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,2560,0.0348469336827596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,2048,0.028138667345046997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,256,0.008016000191370647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,512,0.011098666985829671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,128,0.006333866715431213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,64,0.005601066847642263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,32,0.006010666489601135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,8192,0.08669226964314779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,10240,0.10813013712565105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,7168,0.08010773658752442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,12288,0.1291434685389201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,16384,0.1764458656311035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,5120,0.05635840098063151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,4096,0.0467306653658549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,3584,0.04028480052947998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,6144,0.06595413287480673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,3072,0.03659626642862956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,2560,0.030067199468612672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,1024,0.014793599645296732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,1536,0.019682133197784425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,2048,0.025510400533676147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,512,0.009990400075912476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,768,0.012475732962290447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,256,0.006972800195217133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,128,0.005791999896367391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,32,0.00580266664425532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,64,0.005584000051021576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,10240,0.08629013697306315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,8192,0.07178453604380289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,12288,0.1024469296137492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,16384,0.13493439356486003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,7168,0.06138773361841837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,16384,65536,1.1020618438720704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,6144,0.054176000754038486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,5120,0.04469226598739624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,3584,0.03218239943186442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,4096,0.036060798168182376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,3072,0.029174399375915528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,2560,0.024344533681869507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,1536,0.015797332922617594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,12288,65536,0.783791987101237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,2048,0.019754666090011596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,256,0.006061866879463196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,1024,0.01206826666990916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,768,0.010273067156473796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,512,0.008272000153859456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,128,0.005291733145713806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,32,0.0048767998814582825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,64,0.004715733230113983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,8192,0.061117867628733315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,7168,0.05377386808395386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,10240,0.07639146645863851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,12288,0.09286080201466879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,10240,65536,0.6805237452189128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,6144,0.04662079811096191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,16384,0.12134613196055095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,5120,0.03941973447799683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,4096,0.03253759940465291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,2560,0.021448532740275063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,3584,0.029394133885701494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,3072,0.0256661335627238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,2048,0.01832533280054728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,768,0.010067199667294819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,1024,0.011665067076683045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,256,0.00628053347269694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,512,0.007942399879296621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,64,0.004987733562787374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,1536,0.014797866344451904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,128,0.00544213354587555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,32,0.005269333223501841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,8192,65536,0.549453862508138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,7168,0.046854400634765626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,10240,0.06523520151774088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,8192,0.053870932261149085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,12288,0.07778560320536296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,6144,0.04186026652654012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,3072,0.021618133783340453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,16384,0.10548266569773357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,2560,0.018641066551208497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,3584,0.02516053318977356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,4096,0.027729066212972005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,5120,0.03424319823582967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,2048,0.015332266688346863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,1024,0.0098880002895991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,1536,0.012814933061599731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,256,0.005264000097910563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,768,0.0084906667470932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,64,0.004109866668780645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,512,0.00673280010620753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,128,0.004586666822433472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,32,0.004358399907747904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,8192,0.04550506671269734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,7168,0.04121706485748291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,10240,0.05612266858418783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,12288,0.06930879751841226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,7168,65536,0.46149654388427735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,16384,0.09086080392201742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,6144,0.03490560054779053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,3072,0.019099734226862588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,3584,0.02195733388264974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,5120,0.030305065711339313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,4096,0.024149332443873087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,2048,0.013861333330472311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,1024,0.009099733829498292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,2560,0.016390400131543477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,1536,0.011457066734631855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,768,0.007718400160471599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,512,0.006055466830730438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,128,0.004228266576925913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,256,0.0049685334165891016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,64,0.004220800101757049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,32,0.004362666606903076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,6144,65536,0.40207786560058595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,10240,0.045672531922658285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,12288,0.05591146548589071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,7168,0.03630613485972087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,8192,0.043858134746551515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,16384,0.0785045305887858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,6144,0.029000532627105714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,4096,0.022678399085998537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,5120,0.027013333638509114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,3584,0.01960106690724691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,3072,0.016335999965667723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,2560,0.01325973371664683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,1536,0.00960213343302409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,1024,0.007657599945863088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,2048,0.011325866977373759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,768,0.006296533346176148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,512,0.005261866748332978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,256,0.00439680020014445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,128,0.003894400099913279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,64,0.0035936000446478524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,32,0.0037962667644023894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,5120,65536,0.35303678512573244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,8192,0.034577067693074545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,7168,0.030797866980234782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,10240,0.04345386823018392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,6144,0.027331199248631793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,12288,0.05367679993311564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,16384,0.06968746980031332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,5120,0.022734934091567995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,3584,0.016706132888793947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,4096,0.018709333737691243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,3072,0.015051733454068503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,2560,0.01434346636136373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,1536,0.010334933797518413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,1024,0.007993599772453308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,2048,0.012417067090670269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,768,0.006434133152167003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,512,0.005560533205668131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,4096,65536,0.27824106216430666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,64,0.0034826666116714476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,256,0.00443200021982193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,128,0.004029866556326548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,32,0.0036288000643253326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,7168,0.026423466205596925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,6144,0.023521065711975098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,10240,0.03768320083618164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,8192,0.029782400528589888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,12288,0.048664534091949464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,16384,0.05946026643117269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,5120,0.019402666886647543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,3584,0.0147189329067866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,4096,0.016614400347073875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,3072,0.013078400492668152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,2560,0.013464533289273582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,1024,0.0072405333320299785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,2048,0.011460266510645549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,1536,0.00949013332525889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,768,0.005985066791375478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,128,0.003643733263015747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,256,0.004082133372624716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,512,0.0050357331832249965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3584,65536,0.24131627082824708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,32,0.0034976000587145484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,64,0.0034400001168251038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,7168,0.023665066560109457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,6144,0.02061226765314738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,8192,0.027526400486628216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,10240,0.032214399178822836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,5120,0.017199999094009398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,12288,0.038762664794921874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,16384,0.0520799994468689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,4096,0.014216533303260804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,2560,0.010217600067456563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,2048,0.008941866954167684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,1536,0.007642666498819987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,3072,0.011436800161997478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,3584,0.012853333353996277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,1024,0.006164266665776571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,3072,65536,0.20572586059570314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,256,0.004002133260170618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,768,0.005423999826113383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,512,0.004729599754015604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,128,0.003626666714747747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,64,0.003302400062481562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,32,0.0034261333445707956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,6144,0.021948800484339396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,7168,0.024395734071731567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,12288,0.03313279946645101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,8192,0.029252266883850096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,10240,0.028856533765792846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,16384,0.047722665468851726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,3584,0.013680000106493631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,5120,0.018645334243774413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,4096,0.01571626663208008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,2560,0.009360000491142273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,3072,0.011934933066368104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,2048,0.008081066608428954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2560,65536,0.1832597255706787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,1536,0.006537599861621857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,1024,0.0053151999910672505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,128,0.003442133218050003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,512,0.004221866528193155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,768,0.004794666667779287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,256,0.0037258667250474296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,64,0.0032405334214369455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,32,0.003319466610749563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,6144,0.015475199619928996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,10240,0.02467733422915141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,8192,0.01949866612752279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,7168,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,12288,0.02794559995333354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,16384,0.03454506794611613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,5120,0.013548800349235534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,2048,65536,0.140938663482666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,4096,0.011603200435638427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,3584,0.010601600011189777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,2048,0.008046933511892954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,3072,0.00965119997660319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,2560,0.008932266632715862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,1536,0.006270933151245117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,1024,0.005125333368778229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,768,0.004633600016434988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,32,0.0031456001102924346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,256,0.0036320000886917113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,64,0.003054933249950409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,512,0.0040949332217375435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,128,0.003306666761636734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,12288,0.0249674657980601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,7168,0.014282666643460593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,6144,0.01300373375415802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,8192,0.015843199690183003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,10240,0.021579732497533165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,16384,0.027258666356404622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1536,65536,0.109553066889445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,5120,0.011310933033625285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,4096,0.009710933764775593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,2560,0.00726506660381953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,3584,0.008885332942008972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,3072,0.008232533435026805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,2048,0.006274133423964183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,1536,0.0056096002459526065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,1024,0.004625066618124644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,512,0.0038719999293486277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,256,0.003389866650104523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,768,0.00425493319829305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,32,0.0030229332546393077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,64,0.0029919999341169994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,128,0.0030773334205150605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,1024,65536,0.07682879765828451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,16384,0.021028266350428263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,10240,0.017190400759379068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,8192,0.01537493367989858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,12288,0.019287467002868652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,6144,0.013118933637936911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,7168,0.01418880025545756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,4096,0.009699199597040813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,5120,0.01214400033156077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,1024,0.004459733267625173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,3072,0.007979733248551685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,3584,0.00860693355401357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,2560,0.006868266562620799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,1536,0.005331199864546458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,2048,0.005947733422120413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,768,0.004098133246103922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,512,0.003732266773780187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,256,0.0033482665816942847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,128,0.003115733216206233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,32,0.002946133414904277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,64,0.00296426663796107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,768,65536,0.059860265254974364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,12288,0.015381333231925965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,16384,0.015614933768908181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,10240,0.014019200205802917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,5120,0.009727999567985535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,7168,0.011659733454386393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,8192,0.012384000420570373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,6144,0.010869333148002624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,4096,0.00848533312479655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,3584,0.00788266658782959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,3072,0.007152000069618225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,2560,0.0065184002121289565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,1536,0.005026133358478546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,1024,0.004281599819660187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,2048,0.005640533566474914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,768,0.0041461333632469176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,512,0.0035648000737031303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,128,0.0029909332593282064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,256,0.0032405334214369455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,65536,0.04351679881413777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,64,0.0028223998844623564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,512,32,0.0028778667251269023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,10240,0.00969599982102712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,12288,0.010409599542617798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,8192,0.008574933807055155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,7168,0.008059733112653096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,6144,0.007624533275763195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,5120,0.008563199639320373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,4096,0.00783253312110901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,16384,0.012006400028864543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,3584,0.0075882668296496075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,3072,0.007165866593519847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,1024,0.004149333387613296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,2560,0.006212266782919565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,2048,0.005526400109132131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,1536,0.004871466755867004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,128,0.002932266642649968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,512,0.0034186666210492453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,65536,0.03089066743850708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,768,0.004011733333269755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,256,0.00311253344019254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,64,0.0028042666614055633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,256,32,0.0028309332827727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,16384,0.00897920032342275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,10240,0.008220799763997396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,12288,0.008981333176294962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,7168,0.007253333429495494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,8192,0.007527466615041096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,5120,0.007204266885916392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,6144,0.006826666494210561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,4096,0.00674773355325063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,3584,0.007275733351707459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,3072,0.006810666620731353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,2560,0.006164266665776571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,768,0.0037600000699361167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,1536,0.004790399968624115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,1024,0.00405973345041275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,2048,0.0054666668176651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,64,0.0027562665442625684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,65536,0.024291199445724488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,256,0.003036800026893616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,128,0.002829866607983907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,512,0.003382399926582972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,128,32,0.0027818667391935987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,12288,0.006977066894372304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,8192,0.0069919998447100324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,7168,0.006984533369541168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,10240,0.0071370666225751235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,6144,0.006584533552328746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,5120,0.007014399766921997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,4096,0.006626133124033611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,16384,0.007747200131416321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,3072,0.0067221333583196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,3584,0.007213866710662842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,1536,0.004661333560943603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,1024,0.003978666663169861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,2560,0.005963733295599619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,2048,0.005303466816743215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,768,0.0036288000643253326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,65536,0.021757866938908896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,64,0.002701866626739502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,128,0.0028490667541821797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,512,0.0032885332902272543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,256,0.002977066735426585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,64,32,0.0026890667776266735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,10240,0.007333333293596904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,16384,0.00823466678460439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,5120,0.007045333087444305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,8192,0.006981333096822103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,6144,0.0066101332505544026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,7168,0.006816000243028005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,12288,0.007472000022729237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,4096,0.006614399949709575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,3584,0.0070933332045873005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,768,0.0036320000886917113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,2560,0.006016000111897787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,3072,0.006705066561698914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,1536,0.004683733483155568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,2048,0.005333333214124044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,1024,0.003978666663169861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,65536,0.019205333789189656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,256,0.003044266750415166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,512,0.003320533285538355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,32,0.0027327999472618104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,128,0.002755200117826462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,384,32,64,0.002647466709216436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,4096,0.1949013392130534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,5120,0.23505706787109376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,6144,0.2841247876485189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,7168,0.33482774098714196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,8192,0.3712714513142904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,3072,0.14478933016459147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,3584,0.17146986325581867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,1536,0.08068053722381592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,2560,0.12421653270721436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,2048,0.10509973367055256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,128,0.018004266421000163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,256,0.024991999069849648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,768,0.045665065447489425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,64,0.01440000037352244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,512,0.033497599760691325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,1024,0.05683199961980184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,10240,0.46046826044718425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,32,0.0150026669104894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,12288,0.5702837626139323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,7168,0.08376533190409342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,8192,0.09592426617940267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,10240,0.11657386620839436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,12288,0.13907413482666015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,6144,0.07511253356933593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,4096,0.04862506786982219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,3072,0.039061331748962404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,16384,0.18879787127176922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,3584,0.043331201871236166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,5120,0.060718933741251625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,768,0.012972799936930337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,512,0.010652800401051838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,1024,0.015708800156911215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,1536,0.02107306718826294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,2560,0.032620799541473386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,2048,0.026387200752894087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,64,0.005094400048255921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,256,0.007916800181070964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,128,0.005858133236567179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,32,0.005573333303133646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,65536,16384,0.7425045649210612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,7168,0.06393280029296874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,6144,0.05623466571172079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,8192,0.07361173629760742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,10240,0.08984213670094808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,12288,0.11041493415832519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,5120,0.046274133523305255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,4096,0.03767253160476684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,3584,0.033557331562042235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,3072,0.02908693353335063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,16384,0.14133226076761882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,2560,0.02526719967524211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,1024,0.012427733341852824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,2048,0.020447999238967896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,768,0.010548266768455505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,1536,0.016178133090337117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,256,0.006072533130645752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,512,0.008788266777992248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,64,0.004457599918047587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,128,0.005060266455014547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,32,0.004840533435344696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,8192,0.06259413162867228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,7168,0.054148264726003016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,10240,0.07670933405558268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,12288,0.09084373315175374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,4096,0.03256746729214986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,16384,0.12328853607177734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,5120,0.03976426521937053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,6144,0.04753599961598714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,3584,0.02890133261680603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,3072,0.02553386688232422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,2560,0.02244266668955485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,1536,0.015034666657447815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,2048,0.01829013427098592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,1024,0.011704533298810323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,768,0.010191999872525533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,512,0.008481066425641377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,256,0.005737600227197012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,32,0.0046304002404212955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,128,0.00487360010544459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,64,0.004433066646258036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,8192,0.04912853240966797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,12288,0.07268799940745035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,10240,0.06085866689682007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,16384,0.09783999919891358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,16384,65536,0.7533898671468099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,6144,0.03769280115763347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,5120,0.031897600491841635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,7168,0.043132801850636796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,3072,0.02041920026143392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,4096,0.026436267296473186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,3584,0.02443306644757589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,12288,65536,0.582583491007487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,1536,0.011822932958602905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,2560,0.017484800020853678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,2048,0.014324266711870828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,1024,0.009452799956003826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,256,0.005015466610590617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,512,0.0068810666600863145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,768,0.008161066472530365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,128,0.0043712000052134195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,64,0.004023466755946477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,32,0.004301866888999939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,7168,0.03910719950993856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,8192,0.04326506853103638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,10240,65536,0.47727467219034836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,10240,0.053808001677195225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,12288,0.06627093156178793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,16384,0.08811413447062175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,4096,0.02490133245786031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,5120,0.028114134073257448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,3584,0.020796799659729005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,3072,0.018398932615915933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,6144,0.033598931630452473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,2048,0.012916266918182373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,1536,0.010730666915575664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,2560,0.01569706698258718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,1024,0.008586666981379191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,512,0.006019199887911478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,768,0.007594666878382365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,256,0.004699733356634776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,128,0.004101333270470301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,64,0.0038399999340375268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,32,0.003947733342647553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,8192,65536,0.3847029368082682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,6144,0.03102506597836812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,7168,0.033641600608825685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,8192,0.03950613339742025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,10240,0.046868268648783365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,12288,0.05646293163299561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,16384,0.07642347017923991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,5120,0.02565760016441345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,2560,0.013653332988421122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,1536,0.010220799843470256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,4096,0.022006400426228843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,3584,0.01901866594950358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,2048,0.011659733454386393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,3072,0.015875200430552162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,768,0.007050666709740956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,1024,0.007852800190448761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,256,0.004430933296680451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,512,0.005321600039800008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,32,0.003894400099913279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,128,0.003918933371702829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,64,0.003668266783157984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,7168,65536,0.3389535903930664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,7168,0.03207466602325439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,6144,0.026596266031265258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,8192,0.03872426748275757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,10240,0.04298346837361654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,12288,0.051179734865824375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,3072,0.015306666493415833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,16384,0.0720415989557902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,4096,0.020696532726287842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,2560,0.013447466492652892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,3584,0.01841813325881958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,2048,0.010950400431950887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,5120,0.023121066888173423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,1536,0.009408000111579894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,1024,0.007242666681607564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,256,0.004297600189844767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,768,0.005996799965699514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,512,0.005002666513125102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,128,0.0038143999874591826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,64,0.0035413332283496858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,32,0.003718400001525879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,7168,0.027033599217732747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,6144,65536,0.29519678751627604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,8192,0.030693332354227703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,10240,0.038839467366536456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,16384,0.05416640043258667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,12288,0.04341119925181071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,6144,0.02218559980392456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,5120,0.020282665888468422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,3584,0.015427199999491372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,2048,0.009621333082516987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,3072,0.013294933239618936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,2560,0.011080533266067505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,4096,0.01732906699180603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,768,0.005550933380921682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,1536,0.008505599697430928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,1024,0.006680533289909363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,512,0.004643199841181437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,64,0.003332266708215078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,256,0.003923200070858002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,128,0.003571200122435888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,32,0.0034901333351929987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,5120,65536,0.2440341313680013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,12288,0.03861226638158162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,7168,0.02390399972597758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,8192,0.026900267601013182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,6144,0.02241599957148234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,10240,0.03411519924799601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,16384,0.048501332600911454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,5120,0.01784320076306661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,4096,0.01452906628449758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,3072,0.011677866180737812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,3584,0.012997333208719888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,2560,0.010549333691596986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,2048,0.00909440020720164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,1536,0.007515733440717061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,4096,65536,0.19635945955912273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,1024,0.006144000093142191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,768,0.004920533299446106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,64,0.0032842665910720824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,128,0.0033791999022165934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,512,0.004315733412901561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,256,0.003756800045569738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,32,0.0034048000971476236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,6144,0.018348799149195353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,7168,0.02082560062408447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,8192,0.026693334182103474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,10240,0.02999040087064107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,12288,0.0348416010538737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,5120,0.016902399063110352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,16384,0.04327359994252523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,3072,0.010835199554761251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,2560,0.009806933005650838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,2048,0.008370133241017659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,4096,0.014762666821479798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,3584,0.012773332993189493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,1536,0.00710399995247523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3584,65536,0.1704576015472412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,512,0.004187733431657155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,768,0.004659200211366018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,1024,0.005373866856098175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,256,0.0036245333651701607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,128,0.0032810665667057036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,64,0.003065599997838338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,32,0.0033098667860031127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,7168,0.01840959986050924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,16384,0.042054398854573564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,12288,0.03175680041313171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,8192,0.02587626576423645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,6144,0.016059733430544534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,10240,0.027973333994547527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,4096,0.011752532919247945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,5120,0.013801599542299906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,3584,0.010813867052396137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,3072,0.009799466530481974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,2560,0.00886293351650238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,1536,0.0065311998128890995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,2048,0.007696000238259633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,3072,65536,0.1502218723297119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,128,0.0032000000278155005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,768,0.004388266801834106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,1024,0.004966400067011515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,512,0.003974399964014689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,256,0.0035264000296592714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,64,0.003050666550795237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,32,0.003172266731659571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,6144,0.013205333550771078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,7168,0.015485866864522298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,8192,0.021961599588394165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,12288,0.026317866643269856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,10240,0.024599466721216837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,16384,0.031404799222946166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2560,65536,0.13202026685078938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,5120,0.012222933769226074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,1536,0.005704533557097117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,3072,0.008393599589665731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,4096,0.010537599523862202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,3584,0.009315199653307597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,2560,0.00795839975277583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,2048,0.006588799754778545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,768,0.004404266675313314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,1024,0.004791466891765595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,512,0.003885866701602936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,256,0.0034495999415715536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,32,0.003190399954716364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,128,0.003206400076548258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,64,0.0030218665798505146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,8192,0.01844586730003357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,7168,0.01447466711203257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,6144,0.012929067015647888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,10240,0.020553600788116456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,2048,65536,0.10337066650390625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,16384,0.025961599747339886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,12288,0.02108586629231771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,2560,0.00761599987745285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,5120,0.011397332946459452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,2048,0.006762666503588359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,1536,0.005884799857934316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,4096,0.0098880002895991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,3584,0.0092330664396286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,3072,0.008414933085441589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,1024,0.004666666686534882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,768,0.0042463997999827065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,256,0.003398400048414866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,512,0.003881600002447764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,128,0.003160533308982849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,64,0.003009066730737686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,32,0.0029866665601730345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1536,65536,0.08074560165405273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,10240,0.015879467129707336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,12288,0.016357333461443583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,7168,0.012588799993197123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,6144,0.01139520009358724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,8192,0.013942399621009826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,5120,0.010931199789047242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,16384,0.020472532510757445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,1536,0.005721599857012431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,4096,0.010390399893124899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,3584,0.008984532952308655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,3072,0.008168533444404602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,2048,0.006150400141874949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,2560,0.0069365332523981735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,1024,0.004791466891765595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,768,0.004252799848715464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,65536,0.054674132664998376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,512,0.003807999938726425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,256,0.0032831999162832894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,128,0.003014400104681651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,64,0.002845866729815801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,1024,32,0.002869333326816559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,12288,0.013288533687591553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,16384,0.0172650674978892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,8192,0.012475732962290447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,10240,0.012177067001660664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,5120,0.010238933563232421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,7168,0.011829333504041036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,6144,0.01088746686776479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,4096,0.009161600470542907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,3584,0.008122666676839193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,3072,0.007437866429487865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,2560,0.006467199822266896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,768,0.003956266740957896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,1024,0.004259199897448222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,1536,0.0050357331832249965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,2048,0.005701333284378052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,512,0.0035743998984495797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,64,0.002810666710138321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,256,0.003201066702604294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,128,0.0030432000756263735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,65536,0.043746133645375565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,768,32,0.0029525332152843474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,10240,0.010041600465774536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,8192,0.009965866804122925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,12288,0.011250133315722149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,16384,0.012473600109418233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,7168,0.009914666414260864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,4096,0.007584000130494435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,6144,0.00901759962240855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,5120,0.008574933807055155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,3072,0.007239466905593872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,3584,0.007533866663773854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,2560,0.006477866570154827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,1536,0.005029333134492239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,2048,0.005898666878541311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,1024,0.004353066782156626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,768,0.0038880000511805216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,64,0.0028575999041398365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,65536,0.03431253433227539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,512,0.0035807999471823373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,256,0.0031680000325044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,128,0.0028575999041398365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,512,32,0.0028736000259717304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,12288,0.009224533041318258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,8192,0.008756267031033833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,10240,0.00879253347714742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,7168,0.008318933347860973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,16384,0.010558933019638062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,6144,0.007765333354473114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,5120,0.007379200061162312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,4096,0.006903466582298279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,3584,0.0073290665944417315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,2560,0.006241066753864289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,3072,0.00689279983441035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,1536,0.0048437332113583885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,2048,0.0055402666330337524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,768,0.0037802666425704955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,1024,0.004089600096146265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,65536,0.024146133661270143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,512,0.0035018667578697203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,256,0.0031925333042939507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,128,0.0029696000119050344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,64,0.0027989332874615988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,256,32,0.0028714666763941447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,10240,0.00759680022795995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,16384,0.00909440020720164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,12288,0.007945600152015685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,7168,0.006887466708819072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,8192,0.00710399995247523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,6144,0.006666666766007741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,4096,0.006621866424878438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,5120,0.007136000196139018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,65536,0.0186954657236735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,3584,0.007136000196139018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,2560,0.006046933432420095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,2048,0.005371733506520589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,1024,0.004010666658480962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,3072,0.006680533289909363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,1536,0.004726399978001913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,512,0.0033215999603271483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,768,0.0036490666369597114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,256,0.0029877332349618276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,32,0.002799999962250392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,128,0.0028864001234372456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,128,64,0.0026602665583292644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,12288,0.00694400022427241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,16384,0.00746666689713796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,7168,0.006762666503588359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,8192,0.006886399785677592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,10240,0.007032533486684163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,6144,0.006566399832566579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,5120,0.007006933291753133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,4096,0.006577066580454509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,3072,0.006760533154010773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,3584,0.007011199990908305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,65536,0.011516799529393513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,2560,0.0059562668204307554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,2048,0.005272533496220907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,1024,0.003927466770013174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,1536,0.004600533346335093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,512,0.0033130665620168054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,768,0.0035829332967599234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,256,0.0029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,128,0.0027882667879263563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,64,0.002657066782315572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,64,32,0.0027413333455721537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,12288,0.0070613334576288865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,16384,0.006962133447329204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,10240,0.007001600166161854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,8192,0.006856533388296763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,6144,0.006563200056552887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,7168,0.0068010667959849044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,5120,0.006931200126806895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,4096,0.00664213349421819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,3072,0.006594133377075195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,65536,0.01055999994277954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,3584,0.006966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,2560,0.005982933441797892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,512,0.003271466741959254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,768,0.0036576000352700555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,2048,0.005334400137265523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,1536,0.004665599763393402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,1024,0.003945599993069967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,256,0.0029450667401154833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,128,0.0027797333896160126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,32,0.0026506667335828146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,256,32,64,0.002629333237806956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,4096,0.16360960006713868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,5120,0.19834879239400227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,6144,0.23982399304707847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,7168,0.2725920041402181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,3072,0.12521173159281412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,8192,0.30682560602823894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,3584,0.14488320350646972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,2560,0.1092746655146281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,1536,0.07162559827168782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,2048,0.09346026579538981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,1024,0.05083839893341065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,10240,0.3821813265482584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,768,0.041099735101064044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,256,0.019858133792877198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,128,0.014717866977055868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,64,0.011806933085123698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,512,0.030933332443237305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,32,0.01346666713555654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,12288,0.4621535936991374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,7168,0.07060373624165853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,8192,0.08078827063242594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,10240,0.09858880043029786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,12288,0.1175381342569987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,6144,0.06654186646143595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,16384,0.1534239927927653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,4096,0.04256426493326823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,5120,0.05177919864654541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,3584,0.03767786820729573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,2560,0.028706133365631104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,2048,0.02560960054397583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,3072,0.03404586712519328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,768,0.011103999614715577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,65536,16384,0.6205120086669922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,1536,0.019154133399327596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,1024,0.01456000010172526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,256,0.006783999999364217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,512,0.008896000186602275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,128,0.005044266581535339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,32,0.004869333406289419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,64,0.004561066627502441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,6144,0.04646613200505574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,7168,0.05459946791330973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,8192,0.06060800155003866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,10240,0.07587412993113199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,12288,0.08920319875081381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,4096,0.0336298664410909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,5120,0.039842132727305093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,3584,0.02945599953333537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,2560,0.02355946699778239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,3072,0.025933865706125898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,16384,0.11780479749043782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,768,0.00916373332341512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,1024,0.010757333040237427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,2048,0.01840533415476481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,512,0.007589333256085714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,128,0.004509866734345754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,1536,0.014344533284505209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,256,0.005218133330345154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,64,0.004066133250792822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,32,0.00451200008392334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,7168,0.05386666854222616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,8192,0.056074666976928714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,10240,0.06957866350809733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,12288,0.08214293320973715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,16384,0.10846933523813884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,6144,0.04679146607716878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,5120,0.0371018648147583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,4096,0.030613332986831665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,3584,0.027110399802525838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,3072,0.024555732806523643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,1024,0.010195199648539226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,768,0.00953493316968282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,2560,0.02207146684328715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,2048,0.017400532960891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,1536,0.013687466581662497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,256,0.005242666602134705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,512,0.007048533360163371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,128,0.004569600025812784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,64,0.00395413339138031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,32,0.004166399935881296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,8192,0.04434773524602254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,10240,0.05376426776250204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,12288,0.0653877337773641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,16384,65536,0.6281312306722004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,16384,0.08902080059051513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,5120,0.029449599981307983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,6144,0.03369599978129069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,7168,0.03876906633377075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,4096,0.025840000311533613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,3584,0.022730666399002075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,3072,0.019215999046961467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,12288,65536,0.464189879099528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,2560,0.015320533514022827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,768,0.00730453332265218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,1024,0.008269866804281871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,1536,0.010321066776911417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,2048,0.012590932846069335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,128,0.003918933371702829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,512,0.00613013356924057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,256,0.0043722664316495265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,64,0.0036373332142829893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,32,0.003946666667858759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,8192,0.0421183983484904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,7168,0.034221867720286056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,10240,0.05189866622289022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,10240,65536,0.4037109375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,12288,0.06261973381042481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,16384,0.08108159701029459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,4096,0.02280319929122925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,2560,0.0149536003669103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,3584,0.02021013299624125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,5120,0.025639466444651288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,3072,0.017145599921544394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,6144,0.029561599095662434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,1536,0.01028053363164266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,1024,0.008016000191370647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,512,0.0057888001203536986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,2048,0.012173866232236225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,768,0.007096533477306366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,256,0.004605866471926371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,128,0.004071466624736786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,8192,65536,0.30979413986206056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,32,0.0040277334551016486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,64,0.0038399999340375268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,8192,0.043372801939646405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,6144,0.026686932643254595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,10240,0.048555731773376465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,7168,0.03246506651242574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,12288,0.05496319929758707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,16384,0.07103040218353271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,3584,0.018706132968266807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,5120,0.023639466365178427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,4096,0.020768000682195028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,3072,0.016353066762288412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,2560,0.013116799791653953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,2048,0.011004799604415893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,1536,0.009380267063776652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,1024,0.007446399827798207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,512,0.00505920002857844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,768,0.0063296000162760425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,256,0.004153600086768469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,32,0.0036138666172822317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,128,0.0036906667053699495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,64,0.003487999985615412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,7168,65536,0.2843477249145508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,7168,0.02996480067571004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,10240,0.04122560024261475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,8192,0.03097493251164754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,12288,0.04413546721140544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,16384,0.05945493380228678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,4096,0.019282132387161255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,6144,0.025589332977930708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,5120,0.0205567995707194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,3584,0.016985599199930826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,3072,0.01353600025177002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,2560,0.011614933609962463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,2048,0.011198932925860088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,1024,0.006795733173688252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,1536,0.009270399808883667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,128,0.003602133442958196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,256,0.004019200056791306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,768,0.006182399888833364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,512,0.004540800054868063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,64,0.0032799998919169106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,32,0.0034005333979924522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,6144,65536,0.2325984001159668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,8192,0.03554986715316773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,10240,0.044726399580637614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,7168,0.029237333933512372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,12288,0.05322240193684896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,16384,0.053629867235819494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,5120,0.02035306692123413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,6144,0.02178666591644287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,3584,0.014693333705266317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,3072,0.012725333372751871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,2560,0.01097813347975413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,4096,0.017069866259892784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,2048,0.009332266449928284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,1536,0.007921066880226136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,128,0.00345920001467069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,1024,0.005758933226267497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,512,0.004327466587225596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,768,0.004986666639645894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,256,0.0037973334391911825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,32,0.0034506666163603462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,64,0.0032661333680152893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,5120,65536,0.20592640240987142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,8192,0.03401386737823486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,7168,0.020959999163945517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,6144,0.026924800872802735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,10240,0.029250133037567138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,12288,0.04699840148289998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,16384,0.04647573232650757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,5120,0.02179626623789469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,4096,0.016037333011627197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,3584,0.012371200323104858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,4096,65536,0.16251947085062662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,2560,0.01116373340288798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,1536,0.007971199850241344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,3072,0.011059199770291645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,2048,0.00944640040397644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,1024,0.005729066828886667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,768,0.005016533533732096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,128,0.0033173332611719764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,512,0.004347733159859975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,256,0.0037813333173592886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,32,0.0031850665807724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,64,0.003092266619205475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,10240,0.02699306607246399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,12288,0.03163733283678691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,8192,0.02246399919191996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,7168,0.020060799519220986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,6144,0.0175327996412913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,16384,0.04114773273468018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,4096,0.01288640002409617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,5120,0.0150709331035614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,3072,0.010382933417956035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,3584,0.01144426663716634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,2560,0.01018453339735667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,2048,0.008806399504343669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,1536,0.007396266857783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,1024,0.005520000060399374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3584,65536,0.1484074592590332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,768,0.004922666649023692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,64,0.003205333401759466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,256,0.003731200098991394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,512,0.0042133331298828125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,128,0.0034005333979924522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,32,0.0031669333577156065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,6144,0.01394773324330648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,7168,0.015982932845751443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,12288,0.03026026686032613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,8192,0.020578134059906005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,10240,0.026818132400512694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,5120,0.012106666962305706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,16384,0.03776106834411621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,4096,0.010351999600728353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,3584,0.00941973328590393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,2048,0.0073290665944417315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,3072,65536,0.12458559672037761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,3072,0.009894399841626485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,2560,0.00834986666838328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,1536,0.006197333335876465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,512,0.003869866579771042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,1024,0.0049098665515581764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,768,0.004407466451327006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,256,0.003470933437347412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,128,0.0031626666585604347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,64,0.0029738667110602063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,32,0.0030720000465710956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,12288,0.029553065697352093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,7168,0.015064533551534018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,6144,0.013301333785057068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,10240,0.02499306599299113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,8192,0.023093332846959434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,16384,0.030032000939051312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,5120,0.011784533659617107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2560,65536,0.10637973149617512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,4096,0.010341333349545796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,3584,0.009388800462086995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,3072,0.00848746697107951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,1536,0.005657599866390228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,2560,0.007701333363850911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,2048,0.006746666630109151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,1024,0.00468800018231074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,128,0.0031541332602500914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,768,0.0042453333735466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,256,0.0033941333492596946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,512,0.00378560001651446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,64,0.0030410667260487873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,32,0.0030016000072161358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,10240,0.01908586621284485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,8192,0.016661333044370015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,16384,0.02736746668815613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,2048,65536,0.08979307015736898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,6144,0.013770666718482972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,7168,0.014897066354751586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,12288,0.021719467639923096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,5120,0.012788266936937968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,3584,0.009223467111587525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,3072,0.008403199911117553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,4096,0.010186666250228881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,1536,0.00543146679798762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,2048,0.005998933315277099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,2560,0.0071712002158164975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,768,0.004161066561937332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,1024,0.0044725333650906885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,512,0.0037471999724706015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,256,0.0034101332227389016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,64,0.002919466545184453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,128,0.003044266750415166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,32,0.003017599880695343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,12288,0.016148266196250916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1536,65536,0.07311466534932455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,10240,0.014430933197339377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,16384,0.022267733017603555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,5120,0.010236799716949463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,7168,0.011904000242551168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,8192,0.012827733159065246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,6144,0.011092266440391541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,4096,0.009287466605504353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,2560,0.007062399884064992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,3072,0.007812266548474629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,3584,0.008601599931716919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,2048,0.005710933109124502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,512,0.003568000098069509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,1536,0.005143466591835022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,1024,0.004252799848715464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,768,0.003962666789690653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,65536,0.05362879832585653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,256,0.0032597333192825317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,32,0.0029237332443396253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,128,0.003013333429892858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,1024,64,0.0028607999285062153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,12288,0.01290773351987203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,16384,0.015219199657440185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,7168,0.010245333115259807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,10240,0.011666133006413778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,8192,0.01202453374862671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,6144,0.009588266412417095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,5120,0.008903466661771138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,3072,0.007410133381684621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,2560,0.006392533580462138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,4096,0.00892799993356069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,3584,0.008156799773375193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,2048,0.005650133391221364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,1536,0.005016533533732096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,768,0.003903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,512,0.0035157332817713416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,1024,0.00421013335386912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,64,0.0028309332827727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,65536,0.0421887993812561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,256,0.0031925333042939507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,128,0.003011200080315272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,768,32,0.0029205332199732465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,16384,0.01248426636060079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,12288,0.01074666678905487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,10240,0.009924266735712688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,7168,0.008602666854858398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,8192,0.009126399954160053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,6144,0.008035199840863545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,4096,0.007889066636562348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,5120,0.008942932883898417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,1024,0.004134399940570196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,3072,0.0071285332242647815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,3584,0.00758186678091685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,2560,0.006223999957243601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,1536,0.0049002667268117275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,65536,0.032654933134714764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,2048,0.005555200080076853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,768,0.003824000060558319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,512,0.0034517332911491393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,256,0.003124266614516576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,64,0.002784000088771184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,32,0.0028192001084486645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,512,128,0.0028757333755493166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,10240,0.008460799853007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,16384,0.0091839998960495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,12288,0.009530666470527648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,4096,0.007062399884064992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,8192,0.007613866527875264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,7168,0.007268266876538594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,6144,0.00689279983441035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,5120,0.007499733567237854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,65536,0.02349546750386556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,2560,0.006129066646099091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,3072,0.006886399785677592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,3584,0.007223466535409291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,2048,0.005480533341566721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,512,0.003373866776625315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,1536,0.004779733220736186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,1024,0.0041002665956815084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,256,0.003033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,768,0.0037280000746250153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,128,0.0028661333024501802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,64,0.0027189334233601887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,256,32,0.0027456000447273255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,10240,0.007414400080839793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,16384,0.00773119976123174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,12288,0.0069578667481740315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,8192,0.007221333185831706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,4096,0.006712533533573151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,7168,0.007145600020885467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,5120,0.007110400001207988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,6144,0.006666666766007741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,65536,0.01472106675306956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,2048,0.005399466554323832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,3584,0.007142400244871776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,3072,0.006684799989064534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,2560,0.0060245335102081295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,1024,0.004010666658480962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,1536,0.004709333181381226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,512,0.0033952000240484873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,768,0.003640533238649368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,256,0.003067733347415924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,128,0.0028480000793933867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,64,0.0026752000053723653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,128,32,0.002717866748571396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,8192,0.007002666592597961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,12288,0.00721919983625412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,16384,0.007582933207352956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,10240,0.007091199855009715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,4096,0.006666666766007741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,5120,0.00694400022427241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,65536,0.011398399869600933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,6144,0.0065311998128890995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,7168,0.006706133484840393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,3584,0.007011199990908305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,2048,0.00528106689453125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,3072,0.006683733562628429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,2560,0.005977599819501241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,1024,0.003920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,256,0.002963199963172277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,1536,0.004605866471926371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,768,0.0035957333942254386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,512,0.003252266595760981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,128,0.002834133307139079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,32,0.0026549334327379864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,64,64,0.0027850667635599775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,16384,0.007845333218574524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,7168,0.006807466844717662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,12288,0.006806399921576183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,8192,0.006866133213043213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,10240,0.006960000097751618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,6144,0.006543999910354615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,65536,0.012808533509572348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,3584,0.007115733126799266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,4096,0.00647573322057724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,5120,0.006925866504510244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,3072,0.006629333396752675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,2560,0.005992533266544342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,2048,0.005268266797065735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,512,0.003272533416748047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,768,0.003639466563860575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,1536,0.004640000065167745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,1024,0.003957333415746689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,128,0.0027882667879263563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,64,0.002639999985694885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,32,0.0026848000784715016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,192,32,256,0.003066666672627131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,4096,0.14776959419250488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,5120,0.18197654088338217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,6144,0.21364800135294595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,7168,0.24662399291992188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,8192,0.2814314524332682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,3584,0.12949546972910564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,3072,0.1137610673904419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,2560,0.10118292967478434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,2048,0.08548266887664795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,1536,0.06734506289164224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,768,0.039239466190338135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,10240,0.34962987899780273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,1024,0.04733440081278483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,512,0.028293333450953168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,128,0.011592533191045125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,64,0.009831466277440389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,32,0.00957973301410675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,256,0.01696746746699015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,12288,0.41568533579508465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,6144,0.061544533570607504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,7168,0.06386239926020304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,8192,0.07348372936248779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,10240,0.09018346468607584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,12288,0.10579840342203777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,65536,16384,0.5505098978678385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,4096,0.03985706567764282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,5120,0.048002131779988605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,3584,0.0352021336555481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,16384,0.13792319297790528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,3072,0.0313973327477773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,2560,0.028331732749938963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,1024,0.012622933586438498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,2048,0.02397759954134623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,768,0.011241599917411804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,1536,0.01853546698888143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,256,0.006449066599210103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,512,0.008827733000119527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,64,0.004403199752171834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,128,0.0047765334447224935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,32,0.004683733483155568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,6144,0.045739734172821046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,7168,0.052194134394327796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,8192,0.059279998143514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,10240,0.0835914691289266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,12288,0.09745919704437256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,3584,0.028803199529647827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,5120,0.03922666708628337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,4096,0.0319541335105896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,3072,0.02553173303604126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,16384,0.12102080186208089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,1536,0.013697066903114319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,1024,0.010211199522018433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,2048,0.017324799299240114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,2560,0.021527467171351115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,768,0.008658132950464885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,256,0.0049216002225875854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,64,0.003886933376391729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,128,0.004229333500067393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,512,0.007503999769687653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,32,0.0041685332854588825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,7168,0.04899946848551433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,8192,0.06191680034001669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,10240,0.0739029328028361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,12288,0.08836266994476319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,16384,0.10855039755503337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,3584,0.02691306670506795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,5120,0.036761601765950516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,4096,0.034908799330393474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,6144,0.04259093205134074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,3072,0.024344533681869507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,1536,0.012688000003496805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,1024,0.009790933132171631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,768,0.00842026670773824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,2560,0.01887893279393514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,2048,0.0157258669535319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,512,0.007216000060240428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,128,0.004173866907755534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,256,0.005022933085759481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,64,0.0042581334710121155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,32,0.004481066763401031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,8192,0.038864000638326006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,10240,0.04691733519236247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,12288,0.05528213183085123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,16384,0.07370453675587972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,16384,65536,0.5710208257039388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,6144,0.02988053361574809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,7168,0.033469867706298825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,4096,0.023310933510462442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,3584,0.02065066695213318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,5120,0.025868799289067584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,2048,0.012075733145078022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,3072,0.017053866386413576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,1536,0.009998933474222819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,1024,0.007914666831493378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,2560,0.014801067113876343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,12288,65536,0.42945171991984055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,768,0.0069365332523981735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,512,0.005674666663010915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,256,0.0042698666453361515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,128,0.0037429332733154297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,32,0.0037578667203585305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,64,0.00347626656293869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,7168,0.031065599123636885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,8192,0.03495253324508667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,10240,65536,0.38427734375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,10240,0.04312106768290202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,12288,0.050411732991536465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,16384,0.06605653365453085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,2560,0.01362666686375936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,5120,0.023832533756891885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,6144,0.027412267525990804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,3584,0.017771732807159425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,3072,0.015889066457748412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,4096,0.021460266908009847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,2048,0.011448533336321513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,1536,0.009523199995358785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,1024,0.007652266820271809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,768,0.006793599824110668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,512,0.005299200117588043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,256,0.004186666508515676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,128,0.00377813329299291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,64,0.0033674667278925574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,32,0.0034858666360378264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,8192,65536,0.2934837341308594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,10240,0.040906667709350586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,7168,0.02880000074704488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,8192,0.032364799578984575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,12288,0.04635519981384277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,6144,0.027398399511973065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,16384,0.06697493394215902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,5120,0.02250986695289612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,2560,0.012660266955693564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,1536,0.009213866790135701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,4096,0.018523732821146645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,2048,0.01104960044225057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,3072,0.014578133821487427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,3584,0.016609066724777223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,768,0.0065301333864529925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,1024,0.007709866762161255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,256,0.004199466605981191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,32,0.0035413332283496858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,512,0.005127466718355815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,128,0.003722666700681051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,64,0.003356799980004629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,7168,65536,0.2739402770996094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,6144,0.025406932830810545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,7168,0.033046400547027587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,8192,0.03480106592178345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,12288,0.04545493523279826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,10240,0.04031039873758952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,16384,0.056161065896352134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,3072,0.015830399592717488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,2560,0.012014933427174886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,5120,0.020588799317677816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,4096,0.020883200565973918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,3584,0.015174399813016257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,1536,0.008873599767684936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,2048,0.01053653359413147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,1024,0.00739519993464152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,768,0.006186666587988535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,128,0.0038389332592487337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,256,0.004142933338880539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,512,0.004980266590913137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,64,0.0034944000343481696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,32,0.0034965333839257562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,6144,65536,0.22187093098958335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,12288,0.036160000165303546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,8192,0.027922133604685467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,10240,0.03202986717224121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,16384,0.04456640084584554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,7168,0.02531413237253825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,6144,0.02449280023574829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,3072,0.015096533298492431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,3584,0.016951467593510947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,2560,0.01039466659228007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,1536,0.007481599847475688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,5120,0.020777599016825358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,4096,0.019538132349650066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,2048,0.008478933572769165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,1024,0.005705599983533224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,512,0.00452159990866979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,768,0.004771199822425842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,256,0.003685333331425985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,128,0.0034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,64,0.0031744000812371576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,5120,65536,0.1919882615407308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,32,0.0032970666885375976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,7168,0.024922666947046916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,8192,0.025803732872009277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,10240,0.02977493405342102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,6144,0.021527467171351115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,12288,0.033598931630452473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,16384,0.04078933397928874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,4096,65536,0.14971200625101727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,4096,0.012033067146937053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,5120,0.014406399925549826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,3584,0.01087679962317149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,3072,0.00981760025024414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,2560,0.008771199981371562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,1536,0.007152000069618225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,2048,0.007730133334795634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,64,0.003067733347415924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,768,0.0045952002207438145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,1024,0.005137066543102265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,256,0.0035349334279696143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,512,0.004060799876848856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,128,0.0032448001205921174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,32,0.003143466760714849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,8192,0.023853866259257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,7168,0.02172693411509196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,6144,0.01976319948832194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,10240,0.02760639985402425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,12288,0.03084160089492798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,16384,0.03896960020065308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,5120,0.016132266322771708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,4096,0.014424533645311991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,3584,0.012411733468373615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,3072,0.010971732934316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,2048,0.007539199789365132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,2560,0.00855466624101003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,1536,0.00659093310435613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,256,0.003554133325815201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,1024,0.004911999901135763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,768,0.004525866607824961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,512,0.003990400085846583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,128,0.0033344000577926634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,64,0.0030464000999927522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,32,0.003125333289305369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3584,65536,0.14900479316711426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,10240,0.02344320019086202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,3072,65536,0.11971413294474284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,12288,0.026226133108139038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,8192,0.020122667153676353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,7168,0.018450133005777993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,16384,0.03219093283017476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,4096,0.013597866892814637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,6144,0.01723946730295817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,5120,0.015054933230082192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,2560,0.008231466511885326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,3072,0.010471466183662414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,3584,0.01193386713663737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,2048,0.007434666653474172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,1536,0.006108800073464712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,1024,0.00487360010544459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,768,0.004496000210444133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,512,0.003939199944337209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,32,0.00308693324526151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,256,0.003642666588226954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,128,0.0032255999743938447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,64,0.002993066608905792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,6144,0.01393066644668579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,10240,0.01960106690724691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,7168,0.014973866939544677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,8192,0.016506666938463845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,12288,0.022051199277242025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2560,65536,0.1011957327524821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,16384,0.027022933959960936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,4096,0.012016000350316365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,5120,0.013138133287429809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,3584,0.010133333007494609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,2560,0.007629866898059845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,1024,0.00446720023949941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,3072,0.009221333265304565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,2048,0.006930133203665416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,1536,0.005453866720199585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,128,0.0030879999200503034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,768,0.004095999896526337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,256,0.0033471999069054925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,512,0.003704533229271571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,32,0.003124266614516576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,64,0.002922666569550832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,10240,0.01573013365268707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,2048,65536,0.08213546276092529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,7168,0.013246933619181315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,6144,0.014934399724006652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,8192,0.014074666301409402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,12288,0.018243199586868285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,16384,0.02283733288447062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,1536,0.0053951998551686605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,4096,0.011165866255760193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,5120,0.012925866246223449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,3584,0.010098133484522502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,3072,0.009238400061925252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,2048,0.006135466694831848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,2560,0.007416533430417378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,1024,0.0044159998496373495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,768,0.004068266600370407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,512,0.0036661334335803984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,64,0.002951466788848241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,256,0.003303466737270355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,128,0.003058133274316788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,32,0.0029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1536,65536,0.06775999863942464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,16384,0.016394666830698647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,12288,0.014427733421325684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,8192,0.014121599992116294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,10240,0.01442346672217051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,7168,0.012806399663289388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,6144,0.012103466192881267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,4096,0.00920746624469757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,3584,0.00853653351465861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,5120,0.010726400216420491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,3072,0.007763200004895528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,2048,0.005982933441797892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,2560,0.0068906664848327635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,1536,0.005329066514968872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,1024,0.004518400132656098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,65536,0.047278932730356854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,768,0.004025600105524063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,512,0.003626666714747747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,256,0.003271466741959254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,128,0.003014400104681651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,64,0.002842666705449422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,1024,32,0.002868266652027766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,16384,0.01416000028451284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,12288,0.016748799880345663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,6144,0.011201066772143047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,4096,0.008413867155710856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,8192,0.013065600395202636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,7168,0.011997866630554199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,10240,0.015320533514022827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,5120,0.00969599982102712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,2560,0.006410666803518932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,3584,0.008088533580303193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,3072,0.00735040009021759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,65536,0.03918293317159017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,2048,0.005717333157857259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,1536,0.0051466668645540874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,1024,0.004229333500067393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,768,0.0038922667503356934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,512,0.0035360001027584078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,256,0.0031317333380381264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,64,0.0028618666032950084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,128,0.0029738667110602063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,10240,0.010668800274531046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,768,32,0.002867199977238973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,12288,0.011820800105730693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,16384,0.013498666882514953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,8192,0.009799466530481974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,5120,0.00775786687930425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,7168,0.009076266487439474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,4096,0.007976533472537994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,6144,0.008350933591524761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,2560,0.006218666831652323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,2048,0.005544533332188925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,3584,0.00763626645008723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,3072,0.007189333438873291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,1536,0.004878933231035868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,65536,0.029764266808827718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,1024,0.004141866664091746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,768,0.0037930667400360107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,512,0.0034933333595593774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,256,0.0032085334261258446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,32,0.002850133428970973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,64,0.002771199991305669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,512,128,0.002883200099070867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,7168,0.007242666681607564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,8192,0.007456000149250031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,10240,0.00804799993832906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,16384,0.010542933146158855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,12288,0.008854400118192036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,6144,0.006955733398596446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,5120,0.007523199915885926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,4096,0.007106133302052816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,3584,0.007299200197060903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,3072,0.006878933310508728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,65536,0.027837866544723512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,2048,0.005465599894523621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,2560,0.006122666597366333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,768,0.003717333326737086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,1536,0.004758400221665701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,1024,0.0040778666734695435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,512,0.003337600082159042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,256,0.0029845332105954488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,128,0.002834133307139079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,64,0.002749866743882497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,256,32,0.002780800064404805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,10240,0.007562666634718577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,12288,0.007587199906508128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,16384,0.008675199747085572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,8192,0.007333333293596904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,7168,0.007210666437943776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,4096,0.006659199794133504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,65536,0.014479999740918478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,5120,0.0070933332045873005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,6144,0.006614399949709575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,2560,0.005995733539263407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,3584,0.007102933526039123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,1536,0.004722133278846741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,2048,0.005366399884223938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,3072,0.006697600086530049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,1024,0.004031999905904134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,768,0.0036576000352700555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,512,0.0033183999359607695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,64,0.0026869334280490874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,256,0.0029397333661715193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,128,0.002780800064404805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,128,32,0.0027317332724730173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,16384,0.007349333167076111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,8192,0.006913066903750102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,10240,0.007142400244871776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,7168,0.006786133348941803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,12288,0.007212799787521362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,65536,0.01421119968096415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,5120,0.0069909334182739254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,6144,0.006600533425807953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,3584,0.007045333087444305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,4096,0.006626133124033611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,2560,0.005959466596444448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,3072,0.006651733318964641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,512,0.003253333270549774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,2048,0.005291733145713806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,768,0.003568000098069509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,1536,0.004612266520659129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,256,0.003018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,1024,0.003957333415746689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,128,0.0027722666660944624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,32,0.0027200000981489818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,64,64,0.0026517334083716077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,10240,0.0070826664566993715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,16384,0.008214400211970011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,8192,0.0070250665148099255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,12288,0.007311999797821045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,6144,0.006490666667620342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,7168,0.006774400174617767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,4096,0.006571733454863231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,65536,0.012285866340001424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,5120,0.006949333349863689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,3584,0.007055999835332234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,2048,0.005302399893601735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,3072,0.006701866785685222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,2560,0.005987200140953064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,1536,0.004631466666857402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,1024,0.003979733337958654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,128,0.0027978666126728057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,512,0.0032543999453385672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,768,0.0035786665976047516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,256,0.002867199977238973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,64,0.0026346666117509207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,160,32,32,0.002678400029738744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,4096,0.142520538965861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,5120,0.17500905990600585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,6144,0.20740373929341635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,7168,0.2389322598775228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,8192,0.2742506663004557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,3584,0.125437863667806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,3072,0.1088917334874471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,10240,0.33958400090535484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,2560,0.09809813499450684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,2048,0.08309866587320963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,512,0.026576000452041625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,768,0.036311467488606766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,1024,0.04556266864140828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,64,0.00857919951279958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,1536,0.06455146471659343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,256,0.01588266690572103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,128,0.010085333387056987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,32,0.008266666531562805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,12288,0.40676692326863606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,7168,0.06108160018920898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,8192,0.06989440123240152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,6144,0.059572267532348636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,10240,0.0862826665242513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,12288,0.10112319787343342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,16384,0.13338133494059246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,4096,0.03787519931793213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,3584,0.0339349349339803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,5120,0.04556266864140828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,3072,0.031591467062632245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,2560,0.027876265843709308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,1024,0.012355200449625651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,768,0.01002239982287089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,2048,0.0232149342695872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,1536,0.017480534315109254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,512,0.00785813331604004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,256,0.0057322666049003605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,128,0.004556799928347269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,64,0.004123733441034953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,32,0.004558933277924856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,65536,16384,0.5393621444702148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,7168,0.04698346853256226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,6144,0.04566826820373535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,8192,0.05324800014495849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,10240,0.06635306676228842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,12288,0.07726293404897054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,5120,0.03545279900232951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,4096,0.028887466589609785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,3072,0.0230240007241567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,3584,0.026301866769790648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,16384,0.1017141342163086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,1024,0.009688533345858256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,768,0.008337066570917765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,2048,0.016790399948755898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,512,0.00717439999183019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,2560,0.02144320011138916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,1536,0.01318186620871226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,256,0.00452159990866979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,64,0.003638399889071783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,128,0.003994666785001755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,32,0.0039711999396483105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,7168,0.04058239857355754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,8192,0.04689600070317586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,10240,0.05645653406778971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,12288,0.06803306738535563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,16384,0.08748586972554526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,4096,0.025745066006978352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,3584,0.023172267278035483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,5120,0.030620799462000532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,6144,0.035499731699625656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,1536,0.011213866869608562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,2560,0.017114667097727458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,2048,0.01415786643822988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,3072,0.020127999782562255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,1024,0.008586666981379191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,256,0.004216533402601878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,768,0.007382399837176005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,512,0.0061749334136645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,128,0.0036650667587916053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,64,0.003502933432658514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,32,0.003688533355792364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,8192,0.037859201431274414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,10240,0.0457535982131958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,12288,0.05304533243179321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,16384,0.06992426713307699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,16384,65536,0.5527168273925781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,7168,0.032767999172210696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,4096,0.022406399250030518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,5120,0.024859732389450072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,6144,0.028808534145355225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,3584,0.0200053334236145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,3072,0.016609066724777223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,1536,0.009656533598899841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,2560,0.014349866906801859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,12288,65536,0.41599146525065106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,2048,0.01164906620979309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,1024,0.007604266703128815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,768,0.006586666901906331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,512,0.005123200019200643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,256,0.003993600110212962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,128,0.003622400015592575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,64,0.003337600082159042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,32,0.0036117332677046456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,12288,0.05488746563593546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,8192,0.03393919865290324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,7168,0.0305461327234904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,10240,0.04173546632130941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,10240,65536,0.3570197423299154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,16384,0.07057493527730306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,6144,0.026809600989023845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,4096,0.019401599963506065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,3072,0.016635732849438985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,3584,0.017382399241129557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,5120,0.023310933510462442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,2048,0.01050986647605896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,1536,0.008678399523099263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,2560,0.01272640029589335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,1024,0.00691840002934138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,512,0.00456639975309372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,256,0.0037461332976818085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,768,0.006109866499900818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,128,0.003349333256483078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,32,0.0033141332368055976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,64,0.0032405334214369455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,8192,65536,0.28372694651285807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,12288,0.04227946599324544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,7168,0.025623466571172076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,16384,0.0603274663289388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,8192,0.029553065697352093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,10240,0.03554133176803589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,6144,0.024487467606862386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,3584,0.014630400141080222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,2048,0.009750399986902874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,2560,0.01108906666437785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,4096,0.016812799374262492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,3072,0.012773332993189493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,1536,0.008005333443482716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,5120,0.0198527991771698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,1024,0.0065194666385650635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,128,0.003272533416748047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,768,0.00562666654586792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,512,0.0041002665956815084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,256,0.0036117332677046456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,64,0.0031114667654037476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,32,0.0032927999893824257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,7168,65536,0.26134613355000813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,7168,0.023167999585469563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,8192,0.025623466571172076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,10240,0.03713599840799968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,12288,0.042233598232269284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,6144,0.022098133961359658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,16384,0.053819731871287024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,2560,0.009902933239936828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,3072,0.01164906620979309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,5120,0.017948800325393678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,2048,0.008488532900810242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,4096,0.01493333379427592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,3584,0.01322773297627767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,1536,0.007389866809050243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,1024,0.005981866518656413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,64,0.0031648000081380212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,512,0.0039818666875362395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,768,0.004645333190759023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,256,0.003522133330504099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,128,0.0032106667757034303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,32,0.003139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,6144,65536,0.21663146018981932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,8192,0.0253440002600352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,10240,0.02974933385848999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,12288,0.033752532800038655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,6144,0.019160532951354982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,7168,0.02109760046005249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,16384,0.04223680098851522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,3584,0.01197653313477834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,3072,0.010475732882817586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,2048,0.008126933375994365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,5120,0.016210132837295534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,4096,0.014334932963053385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,2560,0.009413333733876546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,5120,65536,0.18775893847147623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,1024,0.005231999854246775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,1536,0.00691840002934138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,768,0.004574933151404063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,64,0.0030720000465710956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,512,0.004014933357636133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,256,0.003542399903138479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,128,0.0032448001205921174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,32,0.00327360009153684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,12288,0.03271786570549011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,7168,0.01768746574719747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,8192,0.019806933403015137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,6144,0.015664000312487283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,10240,0.028283733129501342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,16384,0.04010879993438721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,5120,0.013341866930325828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,3072,0.009411199887593587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,4096,0.011379200220108032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,3584,0.010149332880973815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,2560,0.008671999971071879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,4096,65536,0.14849600791931153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,1536,0.006682666639486949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,2048,0.007650133470694225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,1024,0.005161599814891815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,768,0.004714666803677877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,512,0.004188799858093261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,64,0.0030826665461063385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,256,0.0035605333745479585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,128,0.0033514666060606635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,32,0.0031456001102924346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,7168,0.016755199432373045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,10240,0.024445867538452147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,6144,0.014475733041763306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,8192,0.021580799420674642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,12288,0.027385600407918292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,16384,0.034721068541208905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,4096,0.01087679962317149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,5120,0.012657066186269125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,3584,0.00977280040582021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,3072,0.008965333302815754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,1536,0.006346666812896728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,1024,0.004661333560943603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3584,65536,0.13030933539072673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,2560,0.008061866462230682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,2048,0.007242666681607564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,768,0.00425600012143453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,256,0.003421866645415624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,512,0.0038421332836151125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,128,0.003138133386770884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,64,0.0030218665798505146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,32,0.003054933249950409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,6144,0.013531733552614847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,8192,0.01914880077044169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,12288,0.02531306743621826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,10240,0.02235306700070699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,7168,0.0175327996412913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,16384,0.030789333581924438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,4096,0.010312533378601075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,3072,65536,0.11776426633199055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,3584,0.009530666470527648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,5120,0.011929600437482198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,2048,0.006853333115577698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,3072,0.008685866991678875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,2560,0.007736533383528392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,768,0.004369066655635833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,1536,0.005814399818579356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,1024,0.004880000154177348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,512,0.003868799904982249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,32,0.0030154667794704436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,256,0.0033600000043710076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,64,0.0029386666913827257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,128,0.0030773334205150605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,10240,0.0186954657236735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,7168,0.014969600240389505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,6144,0.013433600465456644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,8192,0.0169706662495931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,12288,0.020651733875274657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,16384,0.025088000297546386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2560,65536,0.0984394629796346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,5120,0.012525866429011026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,1024,0.004569600025812784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,4096,0.010760533809661865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,3072,0.008183466891447704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,3584,0.009240532914797466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,2560,0.007474133372306823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,2048,0.006604800124963124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,1536,0.005532800157864889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,768,0.004136533290147781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,256,0.0032981333633263906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,128,0.0029706666866938275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,32,0.0028885332246621448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,512,0.0037077332536379496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,64,0.0028714666763941447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,16384,0.020708266894022623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,2048,65536,0.0810378630956014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,8192,0.01334826648235321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,6144,0.011576533317565918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,10240,0.014223999778429666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,7168,0.012566399574279786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,12288,0.01614293356736501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,5120,0.010680533448855082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,4096,0.009416533509890239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,3584,0.008749866485595703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,2560,0.0069472000002861025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,2048,0.005713066458702088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,1536,0.005203199883302053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,3072,0.007961600025494894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,1024,0.004276266694068909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,768,0.003977599988381068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,512,0.003575466573238373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,64,0.002890666574239731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,256,0.0032127998769283296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,128,0.00296426663796107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,32,0.002948266764481862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1536,65536,0.060595198472340905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,16384,0.016270933548609416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,12288,0.013171199957529703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,10240,0.011601066589355469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,8192,0.010839466253916423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,7168,0.010316800077756245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,6144,0.009707732995351156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,4096,0.008188800017038981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,5120,0.00909866690635681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,3584,0.008302933474381765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,1024,0.0042026668787002565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,3072,0.007301333546638489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,2048,0.005703466633955637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,2560,0.006376533210277558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,1536,0.005077333251635233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,65536,0.04301120042800903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,256,0.0031818665564060213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,768,0.0038912000755469004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,512,0.003489066660404205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,128,0.002977066735426585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,64,0.002807466685771942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,1024,32,0.002937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,12288,0.011267200112342834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,16384,0.013190399607022604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,8192,0.00941439966360728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,10240,0.010275200009346008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,7168,0.008988799651463826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,6144,0.008552533388137818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,4096,0.008090666433175405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,3584,0.007804800073305767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,5120,0.009427199761072796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,3072,0.0071274667978286745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,2560,0.0062282666563987735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,1024,0.004123733441034953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,768,0.0038250667353471124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,2048,0.005539200206597646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,1536,0.004915200173854828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,65536,0.037189332644144694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,512,0.003446399917205175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,256,0.0030773334205150605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,64,0.0028021333118279776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,32,0.0028351999819278715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,768,128,0.002916266769170761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,16384,0.011050666371981304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,6144,0.008120533327261608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,12288,0.009923199812571209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,7168,0.008763733506202697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,8192,0.009217066566149394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,10240,0.009354666868845622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,5120,0.00739519993464152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,3584,0.007357866565386455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,2560,0.0062826668222745255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,4096,0.006921599805355072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,2048,0.005579733351866404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,3072,0.006898133456707001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,1536,0.004747733473777771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,65536,0.027883734305699664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,1024,0.004073599974314371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,768,0.0037791999677817024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,512,0.003469866762558619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,256,0.0031818665564060213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,128,0.0028949332733949023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,64,0.002842666705449422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,512,32,0.002887466549873352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,12288,0.008309333523114523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,10240,0.007755733529726664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,16384,0.00981119970480601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,8192,0.007096533477306366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,7168,0.006958933174610138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,4096,0.00661653329928716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,6144,0.006669866542021434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,3584,0.007162666817506154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,5120,0.0070720002055168155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,65536,0.018629332383473717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,2560,0.0060810665289560955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,2048,0.005383466680844625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,3072,0.006750933329264323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,768,0.0036746665835380556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,1536,0.004725333551565806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,256,0.003017599880695343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,1024,0.004007466634114583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,512,0.003306666761636734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,128,0.0028661333024501802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,64,0.0026677332818508146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,16384,0.0074432000517845156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,256,32,0.002690133452415466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,12288,0.006852266689141591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,8192,0.00697386662165324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,10240,0.0070933332045873005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,5120,0.006994133194287618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,6144,0.006566399832566579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,65536,0.013312000036239623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,7168,0.006854400038719177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,4096,0.0065087998906771345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,3584,0.007073066631952922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,2048,0.00535999983549118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,1024,0.004008533308903376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,3072,0.006681600213050842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,1536,0.004686933259169261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,2560,0.0060479998588562015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,768,0.003685333331425985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,512,0.003293866664171219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,128,0.0028351999819278715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,256,0.0030122667551040648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,64,0.0026943999032179515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,128,32,0.0026911998788515727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,16384,0.0068800002336502075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,12288,0.006783999999364217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,6144,0.006488533318042755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,8192,0.006773333251476288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,7168,0.00673280010620753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,65536,0.010790399710337321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,10240,0.006856533388296763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,5120,0.006851199766000111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,3072,0.0066549330949783325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,4096,0.006403199831644694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,2560,0.005932799975077311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,3584,0.007006933291753133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,2048,0.005262933174769084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,1536,0.004599466423193614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,1024,0.003913599997758865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,512,0.0033461332321166994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,768,0.003575466573238373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,128,0.002775466690460841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,256,0.0029002666473388673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,64,0.0026496000587940215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,64,32,0.0026890667776266735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,12288,0.006810666620731353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,8192,0.006677333513895671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,16384,0.00681279997030894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,10240,0.006762666503588359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,65536,0.009543466567993163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,7168,0.006683733562628429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,5120,0.006788266698519389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,6144,0.006467199822266896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,3584,0.006905599931875865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,4096,0.006580266853173573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,2560,0.005940266450246175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,3072,0.006593066453933716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,2048,0.005303466816743215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,512,0.0032117334504922234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,1536,0.004632533093293508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,1024,0.003923200070858002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,256,0.003078400095303853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,768,0.0035914666950702667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,32,0.0026752000053723653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,64,0.002624000112215678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,128,32,128,0.00271573339899381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,4096,0.13719679514567057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,5120,0.1709536075592041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,6144,0.20218666394551596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,7168,0.23369280497233072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,8192,0.2670506795247396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,3584,0.12090773582458496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,3072,0.10569067001342773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,2560,0.09181973139444986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,1536,0.06210773388544718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,2048,0.07996799945831298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,1024,0.04284799893697103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,256,0.013702399532000222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,10240,0.3322186787923177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,128,0.009111467003822326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,512,0.025230934222539265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,32,0.006828799843788147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,768,0.03415466547012329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,64,0.007344000041484833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,12288,0.3971562703450521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,7168,0.059129599730173746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,8192,0.06757226785024008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,10240,0.08447786966959635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,6144,0.057486931482950844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,12288,0.0980234702428182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,65536,16384,0.5273717244466145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,3584,0.032706133524576825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,4096,0.03650240103403728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,16384,0.12856746514638265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,5120,0.04422613382339478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,2560,0.026922667026519777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,3072,0.028394667307535808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,2048,0.022449066241582237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,1536,0.016687999169031777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,512,0.007548800110816956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,768,0.010500267148017883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,1024,0.01192639966805776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,256,0.005220266679922739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,128,0.0042453333735466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,32,0.00413226659099261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,64,0.0038389332592487337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,6144,0.04053653478622436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,7168,0.0460970679918925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,8192,0.05314346551895142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,10240,0.06410346825917562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,12288,0.08654826482137044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,5120,0.03511039813359578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,4096,0.029021867116292316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,16384,0.11075413227081299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,3584,0.02571733395258586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,3072,0.02284053365389506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,1536,0.013009066383043924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,2048,0.016408532857894897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,2560,0.01954560081164042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,1024,0.01051200032234192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,512,0.006778666873772939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,768,0.008136533200740814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,256,0.004392533500989278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,128,0.003841066608826319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,64,0.003500800083080927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,32,0.0035978667438030243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,10240,0.0685856024424235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,7168,0.039895466963450116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,8192,0.04471786816914876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,12288,0.07913706302642823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,16384,0.09952320257822672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,3584,0.02265920042991638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,6144,0.0351093331972758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,5120,0.030241066217422487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,4096,0.025010132789611818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,2048,0.01432960033416748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,1536,0.011783466736475626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,1024,0.00893440047899882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,2560,0.018102399508158364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,3072,0.021572266022364298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,128,0.0038101332883040107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,768,0.007751466830571492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,512,0.006622933348019918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,256,0.0043712000052134195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,64,0.003479466587305069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,32,0.0035648000737031303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,8192,0.038423466682434085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,10240,0.0446666677792867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,12288,0.058390398820241295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,16384,65536,0.5977792104085287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,16384,0.07449173132578532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,7168,0.032798933982849124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,5120,0.02458239992459615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,6144,0.028445865710576373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,4096,0.022657066583633423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,3072,0.016515200336774193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,3584,0.018504534165064493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,1024,0.007459199925263722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,12288,65536,0.4151722590128581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,1536,0.00942080020904541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,768,0.00665280024210612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,512,0.005076266825199127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,2560,0.013978667060534158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,2048,0.011473066608111064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,256,0.003853866706291834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,128,0.003491200009981791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,32,0.0035146666069825493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,64,0.0032373333970705668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,8192,0.033343998591105144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,10240,65536,0.36679251988728845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,7168,0.028534400463104247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,10240,0.039607465267181396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,12288,0.04606506824493408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,16384,0.0681279977162679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,6144,0.025703465938568114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,3584,0.01639359990755717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,5120,0.02370880047480265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,4096,0.018797866503397622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,2048,0.010796800255775452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,3072,0.014510933558146158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,2560,0.0124917338291804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,512,0.0044159998496373495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,1536,0.008728532989819845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,768,0.006171733140945435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,1024,0.007357866565386455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,8192,65536,0.27685546875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,64,0.0032255999743938447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,256,0.003739733248949051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,128,0.0034346667428811393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,32,0.0033312000334262846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,6144,0.02250666618347168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,10240,0.03520853519439697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,8192,0.029318400224049884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,7168,0.025539199511210125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,12288,0.04511359930038452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,16384,0.06059413353602091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,2560,0.011296000083287556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,5120,0.019700266917546592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,3584,0.014596266547838846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,4096,0.01657919983069102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,3072,0.012899200121561686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,1536,0.00812906672557195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,2048,0.009554133812586466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,128,0.003385599950949351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,1024,0.006446933249632518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,512,0.004271999994913737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,768,0.0055871998270352686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,256,0.0036831999818483984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,64,0.0031295999884605407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,32,0.0031615999837716425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,7168,0.028300799926122028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,7168,65536,0.25811413129170735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,8192,0.031191466252009074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,10240,0.03663680156071981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,12288,0.04183893203735352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,16384,0.052324267228444424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,6144,0.02018666664759318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,3072,0.01151039997736613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,2560,0.010305066903432209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,5120,0.01889386574427287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,4096,0.014862933754920959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,2048,0.00881173312664032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,3584,0.013124266266822815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,256,0.0036320000886917113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,1024,0.006229333579540253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,1536,0.007607466479142506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,768,0.004929066697756449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,512,0.00409706657131513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,128,0.003306666761636734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,64,0.0031338666876157125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,32,0.0032330666979153953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,6144,65536,0.20621652603149415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,7168,0.023088000218073525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,8192,0.025259733200073242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,10240,0.029309866825739543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,12288,0.034289065996805826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,6144,0.02222613294919332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,16384,0.042019200325012204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,4096,0.014604799946149192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,3584,0.013134933511416116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,3072,0.011779200037320454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,2560,0.008770133058230083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,5120,0.01844586730003357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,1536,0.006542933483918508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,2048,0.007589333256085714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,1024,0.0050005331635475155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,128,0.003253333270549774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,512,0.003835733234882355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,768,0.004170666635036469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,256,0.0034901333351929987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,5120,65536,0.18156372706095378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,32,0.0033290666838486993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,64,0.0030432000756263735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,10240,0.026925865809122724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,7168,0.02066346605618795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,8192,0.024525866905848185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,12288,0.029710932572682695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,6144,0.018693333864212035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,16384,0.039000535011291505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,5120,0.016764799753824867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,4096,0.012545067071914672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,3584,0.011394133170445759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,3072,0.010309333602587383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,2560,0.008260266482830047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,2048,0.007355733215808869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,1536,0.00631573349237442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,4096,65536,0.14686293601989747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,1024,0.0048991998036702475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,64,0.0030389333764712016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,768,0.004259199897448222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,512,0.0038090666135152185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,256,0.0033802665770053864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,128,0.003125333289305369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,32,0.0030464000999927522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,7168,0.018734933932622273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,8192,0.020936532815297445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,12288,0.02674986720085144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,6144,0.016838399569193523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,16384,0.03587626616160075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,10240,0.02379306753476461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3584,65536,0.13383785883585614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,2560,0.007877333462238312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,3584,0.009379200140635173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,5120,0.014984533190727234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,3072,0.00869653324286143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,4096,0.01055573324362437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,2048,0.007047466437021891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,1024,0.004530133306980133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,1536,0.006278400123119354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,768,0.004130133241415024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,32,0.0031061333914597826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,256,0.003387733300526937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,512,0.0037941334148248037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,64,0.002980266759792964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,128,0.003044266750415166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,6144,0.01421119968096415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,12288,0.02319999933242798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,7168,0.015837867061297098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,8192,0.017570134003957114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,10240,0.020483199755350748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,16384,0.028643200794855755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,4096,0.009715200463930766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,5120,0.0131221334139506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,3072,65536,0.11009600162506103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,3072,0.009325866897900898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,3584,0.008950400352478027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,2048,0.006910933554172516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,1536,0.005749333401521047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,2560,0.007653333246707916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,768,0.004103466620047887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,1024,0.0047189335028330484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,64,0.0029834667841593427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,128,0.003045333425203959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,512,0.0037461332976818085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,256,0.0033333333830038703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,32,0.0029663999875386557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2560,65536,0.09313920338948568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,12288,0.01997119983037313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,6144,0.012351999680201214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,7168,0.013592533270517983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,10240,0.01768640081087748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,8192,0.014919466773668923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,16384,0.024628265698750814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,4096,0.009488000472386678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,5120,0.011372799674669903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,3584,0.008708266417185466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,3072,0.00804906686147054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,2560,0.007284266750017802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,2048,0.006592000027497609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,1024,0.004274133344491323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,1536,0.005333333214124044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,768,0.003955200066169103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,512,0.0035775999228159585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,128,0.0030517332255840302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,256,0.0032320000231266023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,32,0.0029674666623274487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,64,0.0028949332733949023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,7168,0.011415466666221619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,10240,0.01418239971001943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,2048,65536,0.0750986655553182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,8192,0.01199679970741272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,16384,0.020472532510757445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,6144,0.010573866963386535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,5120,0.01071573297182719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,12288,0.015893333156903586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,4096,0.009404800335566203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,3584,0.008737066388130188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,1536,0.005036800106366476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,3072,0.007885866860548655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,2560,0.006958933174610138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,2048,0.005714133381843567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,1024,0.0042250668009122215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,256,0.0034495999415715536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,768,0.00391146664818128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,512,0.003533866753180822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,64,0.00288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,128,0.002977066735426585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,32,0.0029877332349618276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1536,65536,0.06222826639811198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,8192,0.012531200051307678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,12288,0.012455466389656066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,10240,0.011518933375676473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,16384,0.014991999665896098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,7168,0.011090133587519329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,4096,0.00876586635907491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,6144,0.010279466708501179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,5120,0.00955839951833089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,3584,0.008226133386294047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,3072,0.007420800129572551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,1536,0.004889599978923798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,2560,0.006533333162466686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,2048,0.005553066730499268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,1024,0.004216533402601878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,768,0.003851733356714249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,128,0.0029450667401154833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,512,0.0034474665919939675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,256,0.0031093334158261614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,65536,0.042490665117899576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,64,0.0028384000062942503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,1024,32,0.0028704000016053517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,12288,0.011628799637158712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,8192,0.009816533327102661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,10240,0.010596266388893128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,16384,0.012312533458073933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,6144,0.009143466750780743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,7168,0.009273599584897358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,4096,0.007085866729418437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,5120,0.008357333143552144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,2048,0.005534933507442474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,3584,0.00767573316891988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,3072,0.007246933380762736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,2560,0.006244266529877981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,1536,0.0048981333772341405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,1024,0.004167466859022776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,65536,0.035683198769887285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,512,0.003419733295838038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,768,0.0038058665891488397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,256,0.003115733216206233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,128,0.0029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,64,0.0027082666754722597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,768,32,0.0028362666567166646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,12288,0.010019200046857198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,16384,0.01195093293984731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,8192,0.008568533261617025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,7168,0.007768533130486806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,4096,0.0070933332045873005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,10240,0.009366400043169658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,6144,0.007228800157705943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,5120,0.007546666761239369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,2048,0.005478399991989136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,3584,0.007393066585063934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,65536,0.025991467634836833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,3072,0.006870399912198384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,2560,0.006134399771690368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,1536,0.004783999919891357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,1024,0.0040778666734695435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,512,0.0033717334270477297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,768,0.0037205333511034647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,256,0.0030602666238943735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,64,0.002737066646416982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,128,0.0028543998797734577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,512,32,0.002749866743882497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,12288,0.007282133400440216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,16384,0.008780800302823384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,6144,0.006696533163388569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,10240,0.007430399954319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,8192,0.007272533575693766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,7168,0.007207466661930085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,5120,0.007146666447321574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,4096,0.006729599833488464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,65536,0.019662932554880778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,3072,0.006806399921576183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,768,0.003668266783157984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,3584,0.007298133273919423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,1536,0.004701866706212362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,1024,0.003982933362325033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,2048,0.005397333204746247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,2560,0.0060576001803080235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,512,0.003311999887228012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,128,0.0028575999041398365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,256,0.003089066594839096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,32,0.002717866748571396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,256,64,0.002665599932273229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,12288,0.0070271998643875126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,16384,0.007260799904664357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,8192,0.006872533261775971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,7168,0.006741333504517872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,65536,0.011587199568748475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,10240,0.0070698668559392285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,6144,0.0065184002121289565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,5120,0.007004799942175548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,3072,0.006730666756629944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,2560,0.005996799965699514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,4096,0.006515199939409892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,3584,0.007012266914049785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,1024,0.003950933367013932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,2048,0.005347200234731038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,1536,0.004667733112970988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,512,0.003385599950949351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,768,0.003619199991226196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,256,0.002959999938805898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,32,0.0026933332284291584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,128,0.00278613343834877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,128,64,0.0026922665536403658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,8192,0.006626133124033611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,12288,0.006683733562628429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,16384,0.006818133095900218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,65536,0.011226666967074077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,10240,0.006733866532643636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,7168,0.006563200056552887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,6144,0.006482133269309997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,5120,0.006908800204594929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,4096,0.006439466774463653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,3584,0.0070933332045873005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,3072,0.006503466765085857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,2560,0.005930666625499725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,1536,0.00461760014295578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,512,0.003337600082159042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,2048,0.005267199873924255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,256,0.0029834667841593427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,1024,0.003902933249870936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,768,0.003607466568549474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,64,0.002632533262173335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,128,0.0027232001225153605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,12288,0.006807466844717662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,64,32,0.0026848000784715016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,8192,0.0066655998428662615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,10240,0.006967466572920482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,16384,0.006904533505439759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,65536,0.011452800035476685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,7168,0.006579199930032094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,6144,0.006442666550477346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,5120,0.006913066903750102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,4096,0.006387199958165486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,3584,0.006930133203665416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,2048,0.005272533496220907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,768,0.003667200108369192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,3072,0.0065536002318064375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,2560,0.005962666869163513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,1536,0.004588800172011057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,1024,0.0039018665750821433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,64,0.0027232001225153605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,128,0.0027488000690937043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,512,0.0032490665713946023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,256,0.00288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,96,32,32,0.00262719988822937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,4096,0.14837973912556965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,5120,0.18504533767700196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,6144,0.21793492635091147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,7168,0.2544095993041992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,8192,0.2862645467122396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,3584,0.13052266438802082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,2048,0.08687573273976644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,3072,0.11488853295644123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,2560,0.0979861338933309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,1536,0.06721386909484864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,768,0.033514666557312014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,512,0.026871466636657716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,1024,0.046196266015370684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,10240,0.3579200108846029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,64,0.007758933305740357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,128,0.009485866626103718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,256,0.0153546671072642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,32,0.0068896000583966565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,12288,0.4286069234212239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,7168,0.05885119835535685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,8192,0.06685226758321126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,10240,0.08385919729868571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,12288,0.09825066725413004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,6144,0.05711573362350464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,3584,0.03241173426310222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,5120,0.04384320179621379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,3072,0.0310591995716095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,4096,0.036185598373413085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,16384,0.14334506988525392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,2560,0.024272000789642333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,1024,0.011635200182596842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,768,0.009500799576441447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,1536,0.016694400707880655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,128,0.004014933357636133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,2048,0.020518400271733604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,256,0.005083733300367991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,512,0.008116266628106435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,64,0.003736533224582672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,65536,16384,0.5665440241495768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,32,0.00403413325548172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,6144,0.04195839961369832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,7168,0.05216426849365234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,8192,0.05453759829203288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,10240,0.06381760040918985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,12288,0.08544853528340658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,16384,0.11128959655761719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,3072,0.02222933371861776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,5120,0.03561066786448161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,3584,0.026133332649866742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,4096,0.029421865940093994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,2560,0.01990399956703186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,2048,0.018195199966430663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,256,0.0043488000830014546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,768,0.00844586690266927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,1024,0.010107733805974324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,512,0.006748799979686737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,1536,0.014081066846847535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,128,0.00386559988061587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,64,0.0034442665676275887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,32,0.0035616000493367515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,10240,0.06675413449605307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,8192,0.044462935129801436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,7168,0.03920106490453084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,12288,0.07683413028717041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,16384,0.09753493467966715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,6144,0.035554134845733644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,4096,0.02453119953473409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,5120,0.029605333010355634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,3584,0.022843732436498006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,2560,0.017348267634709678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,1024,0.009102933605511983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,1536,0.011970133582750956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,3072,0.01938026746114095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,2048,0.014669866363207499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,768,0.0076106667518615724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,512,0.006649599969387054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,256,0.004373333354791006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,128,0.0037493333220481873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,64,0.003425066669782003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,32,0.0034602666894594826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,8192,0.037281068166097005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,10240,0.04374186595280965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,16384,65536,0.5693248112996419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,12288,0.05787306626637777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,12288,65536,0.4095328013102214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,16384,0.07337066332499186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,7168,0.032129067182540896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,5120,0.026280534267425538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,3584,0.01967573364575704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,4096,0.020181334018707274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,3072,0.017288533846537273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,6144,0.027746133009592694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,2560,0.013851733009020487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,1536,0.009463466207186381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,768,0.00653653343518575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,1024,0.007436800003051758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,512,0.0047189335028330484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,256,0.0038602667550245917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,2048,0.011396267016728719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,128,0.003420799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,64,0.0032042667269706728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,32,0.0034517332911491393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,10240,0.039400533835093184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,10240,65536,0.36390933990478513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,8192,0.031752532720565795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,7168,0.028223999341328937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,12288,0.04537920157114665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,16384,0.07012800375620523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,6144,0.024684800704320272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,3584,0.01604586640993754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,4096,0.01844266653060913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,5120,0.02332800030708313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,2560,0.013185066978136697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,3072,0.014327466487884521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,2048,0.01032426655292511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,512,0.004244266450405121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,768,0.006543999910354615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,256,0.0037216000258922578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,1024,0.006922666728496551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,1536,0.008614400029182434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,128,0.0033258666594823206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,64,0.0031786667803923286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,32,0.003306666761636734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,8192,65536,0.27406508127848306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,8192,0.028619732459386187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,10240,0.034406399726867674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,7168,0.025066665808359784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,12288,0.03996586799621582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,16384,0.05975893338521322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,6144,0.02387626568476359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,4096,0.016590933005015053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,5120,0.019256534179051717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,3584,0.014454399545987448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,1536,0.007994666695594788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,2560,0.011065600315729777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,3072,0.012633599837621055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,2048,0.009505066275596618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,1024,0.006379733482996623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,768,0.00543146679798762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,512,0.004163199911514918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,256,0.003636266787846883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,64,0.0030762667457262674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,128,0.0032778667906920114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,32,0.0032266666491826378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,7168,0.022411733865737915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,7168,65536,0.25920106569925944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,8192,0.030558933814366657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,10240,0.03604480028152466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,12288,0.041050668557484946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,16384,0.051482665538787845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,6144,0.0196341335773468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,2560,0.0099263995885849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,5120,0.018337066968282065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,3072,0.011218133568763732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,4096,0.01446613371372223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,3584,0.01267519990603129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,1536,0.0073749333620071415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,2048,0.008644266923268636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,768,0.004509866734345754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,1024,0.005815466741720835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,128,0.0032469332218170166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,512,0.0039061332742373147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,256,0.0035274667044480645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,64,0.0030730667213598887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,32,0.003078400095303853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,6144,65536,0.2050922711690267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,8192,0.024980266888936363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,7168,0.02228800058364868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,10240,0.028402133782704668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,16384,0.04104426701863607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,12288,0.033497599760691325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,6144,0.021432532866795858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,5120,0.01800000071525574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,3072,0.009874133268992107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,3584,0.011026133100191753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,2560,0.008584533135096233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,4096,0.012520533800125123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,1024,0.004957866668701172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,2048,0.00766186664501826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,1536,0.006579199930032094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,768,0.004129066566626231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,5120,65536,0.18245760599772137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,512,0.0037077332536379496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,128,0.0031146667897701263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,256,0.0034058667719364167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,64,0.0029418667157491045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,32,0.003159466634194056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,6144,0.01848213275273641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,10240,0.02579733331998189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,8192,0.02232746680577596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,7168,0.020036266247431437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,12288,0.029155200719833373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,16384,0.04108373324076335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,4096,0.010796800255775452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,5120,0.016532267133394875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,3584,0.00986240009466807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,3072,0.00893440047899882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,1536,0.006353066861629486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,2048,0.007624533275763195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,2560,0.008134399851163227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,4096,65536,0.14457066853841144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,1024,0.004830933113892873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,768,0.004147200038035711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,256,0.0033546666304270422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,512,0.003688533355792364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,128,0.003124266614516576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,32,0.002998399982849757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,64,0.00288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,12288,0.02573546568552653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,8192,0.01987839937210083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,7168,0.017605332533518474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,10240,0.022674133380254112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,6144,0.01591146687666575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,5120,0.01416213313738505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,16384,0.032048000892003374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,2048,0.007057066758473713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,4096,0.012410666545232136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,3072,0.010878933469454448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,3584,0.011576533317565918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,2560,0.007814399898052216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3584,65536,0.1321290651957194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,1536,0.006067200005054474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,1024,0.004637866715590159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,768,0.00413973331451416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,512,0.003676799933115641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,256,0.0033503999312718712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,128,0.003033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,64,0.00296426663796107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,32,0.0030080000559488933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,6144,0.013931733369827271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,10240,0.019946666558583577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,8192,0.01707306702931722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,12288,0.022749867041905722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,7168,0.01548373301823934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,16384,0.02828906575838725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,3072,65536,0.11084799766540528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,5120,0.012481066584587096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,1536,0.00572266678015391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,2048,0.006838400165239971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,4096,0.011494400103886922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,3584,0.009128533800443013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,3072,0.008358400066693623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,2560,0.007565866907437642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,768,0.004065066576004028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,1024,0.0044149334232012425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,512,0.0036778666079044344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,256,0.003349333256483078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,128,0.0030346666773160298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,64,0.002977066735426585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,32,0.002915200094381968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2560,65536,0.09265173276265462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,6144,0.011988266309102377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,7168,0.01346879998842875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,5120,0.011806933085123698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,12288,0.019250132640202842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,8192,0.014722133676211039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,10240,0.01680213411649068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,16384,0.023575466871261597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,3072,0.008391466736793519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,4096,0.010134399930636088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,3584,0.0090421328941981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,2560,0.007399466633796692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,2048,0.0066890666882197065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,1024,0.0044149334232012425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,1536,0.005342933535575867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,768,0.004071466624736786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,512,0.0035743998984495797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,256,0.0032458665470282235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,128,0.0030613332986831666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,64,0.0029290666182835894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,32,0.002979200085004171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,16384,0.019692800442377725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,2048,65536,0.07591786384582519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,8192,0.012040533622105916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,10240,0.013767466942469279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,7168,0.011054933071136475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,6144,0.010393599669138592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,12288,0.01529813309510549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,5120,0.010077866911888122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,4096,0.009709866841634114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,3584,0.00958720048268636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,3072,0.007747200131416321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,1024,0.004294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,2048,0.005731200178464254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,2560,0.007000533243020375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,1536,0.005116799970467886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,768,0.003986133386691412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,512,0.0035573333501815797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,128,0.0029898665845394133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,256,0.003205333401759466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,64,0.0028533334533373516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,65536,0.06055999994277954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1536,32,0.0029333333174387617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,10240,0.01132480005423228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,12288,0.012339199582735699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,16384,0.01474453310171763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,7168,0.010075733065605164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,8192,0.010311466455459595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,6144,0.009379200140635173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,4096,0.008100266754627227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,5120,0.008805333574612936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,3584,0.008117333551247915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,3072,0.007322666545708974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,1024,0.004203733305136363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,1536,0.005063466727733612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,2560,0.006666666766007741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,2048,0.005599999924500784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,512,0.003566933423280716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,768,0.0038560000558694207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,65536,0.04089386860529582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,128,0.00297173336148262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,256,0.0032000000278155005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,64,0.0028437333802382152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,1024,32,0.003197866678237915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,8192,0.011075199643770853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,16384,0.012176000078519185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,7168,0.009591466188430786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,12288,0.01045973300933838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,10240,0.01001706620057424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,6144,0.008966400225957235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,4096,0.007341866691907247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,5120,0.008605866630872091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,3072,0.007063466807206471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,3584,0.007837866743405659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,2560,0.006458666423956554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,2048,0.005604266623655955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,1536,0.004977066814899444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,65536,0.03487039804458618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,1024,0.004309333364168803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,512,0.003483733286460241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,768,0.0038986665507157645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,256,0.0031626666585604347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,128,0.0029130667448043824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,32,0.002885333448648453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,768,64,0.0027647999425729113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,12288,0.009514666597048442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,16384,0.010712533195813497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,8192,0.008687999844551087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,10240,0.009057066837946574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,4096,0.007621333499749501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,3584,0.007309866448243459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,7168,0.008007466793060303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,6144,0.007160533467928569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,5120,0.0072170664866765336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,3072,0.006934399902820587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,65536,0.025685334205627443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,2560,0.006140799820423126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,2048,0.005448533097902933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,1536,0.004782933493455251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,768,0.003735466549793879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,1024,0.004073599974314371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,256,0.0030591999491055804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,512,0.0034122665723164878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,32,0.0027893332143624624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,128,0.002922666569550832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,512,64,0.0027413333455721537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,7168,0.006986666719118755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,10240,0.007921066880226136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,8192,0.007229866584142049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,12288,0.008912000060081481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,16384,0.010275200009346008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,6144,0.006693333387374878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,65536,0.016530133287111917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,5120,0.007133866846561432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,4096,0.0067071999112765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,3584,0.007097599903742473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,3072,0.006743466854095459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,2048,0.00537066658337911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,2560,0.006072533130645752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,512,0.0033503999312718712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,1024,0.003975466638803482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,1536,0.004711466530958811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,768,0.0036277333895365395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,256,0.0029696000119050344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,16384,0.007620266576608021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,128,0.0028319999575614927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,64,0.002647466709216436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,256,32,0.0026805333793163298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,12288,0.007005866865317028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,8192,0.006982400019963582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,10240,0.007003733515739441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,65536,0.012570666273434958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,7168,0.0067669332027435304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,6144,0.006472533444563548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,5120,0.006922666728496551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,4096,0.0065194666385650635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,3072,0.006638933221499126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,1536,0.004673066735267639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,2048,0.005379199981689453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,3584,0.0069461335738499955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,2560,0.006009600063165029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,128,0.0028223998844623564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,1024,0.0039658665657043455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,768,0.0036320000886917113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,256,0.0029333333174387617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,512,0.003256533294916153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,64,0.002700799951950709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,128,32,0.0026975999275843303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,16384,0.00676800012588501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,65536,0.011285332838694255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,8192,0.0067221333583196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,6144,0.00639466643333435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,10240,0.006797866523265838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,12288,0.006629333396752675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,7168,0.006459733347098033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,5120,0.006814933319886525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,4096,0.006384000182151794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,2560,0.005982933441797892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,3072,0.006578133503595989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,3584,0.006874666611353557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,2048,0.005276800195376078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,1536,0.004569600025812784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,1024,0.00393599991997083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,768,0.003588266670703888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,256,0.0030602666238943735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,512,0.003223466624816259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,128,0.002771199991305669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,64,0.002656000107526779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,64,32,0.0026880001028378804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,10240,0.006761600077152252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,16384,0.007377066711584728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,8192,0.006603733201821645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,12288,0.006640000144640605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,7168,0.006417066852251689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,65536,0.015636266271273295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,4096,0.006371200084686279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,6144,0.006300800045331319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,5120,0.006706133484840393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,3072,0.006422399977842967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,2048,0.005267199873924255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,3584,0.006743466854095459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,2560,0.005919999877611796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,1536,0.0046015997727712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,1024,0.004026666780312856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,256,0.0029386666913827257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,768,0.003550933301448822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,512,0.0032437334458033243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,128,0.002700799951950709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,64,0.002631466587384542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,80,32,32,0.0026208000878492994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,4096,0.14724159240722656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,5120,0.18392960230509442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,6144,0.2180405298868815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,7168,0.25354026158650717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,8192,0.28455893198649085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,3584,0.1295093297958374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,10240,0.356329600016276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,3072,0.11310826937357585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,2560,0.10126187006632488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,2048,0.08590186436971028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,256,0.01458560029665629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,1536,0.06637866497039795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,768,0.035420799255371095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,512,0.02553279995918274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,1024,0.04505386749903361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,64,0.007142400244871776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,128,0.008803199728329976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,32,0.006134399771690368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,12288,0.42855253219604494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,7168,0.058273065090179446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,8192,0.06650880177815756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,6144,0.05721386671066284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,10240,0.08247466882069907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,12288,0.09712639649709066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,3584,0.03195093274116516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,4096,0.03602879842122396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,3072,0.03014613389968872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,16384,0.13712107340494792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,5120,0.04329066673914592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,2560,0.02603413263956706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,2048,0.019972266753514607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,768,0.010069333513577779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,1536,0.015954132874806723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,1024,0.011378133296966552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,512,0.007899733384450276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,128,0.00401706670721372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,256,0.004844800134499868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,64,0.003676799933115641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,65536,16384,0.5651914596557617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,32,0.003788800040880839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,6144,0.040940801302591964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,7168,0.04978239933649699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,8192,0.05402026573816935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,10240,0.06704959869384766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,12288,0.07730666796366373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,2560,0.019509333372116088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,4096,0.027803732951482134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,16384,0.10795626640319825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,3584,0.02558079957962036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,5120,0.03464959859848023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,3072,0.022765866915384927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,1024,0.010589866836865743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,2048,0.017646932601928712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,768,0.008167466521263123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,512,0.006612266600131989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,256,0.004166399935881296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,1536,0.012455466389656066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,64,0.0033333333830038703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,128,0.0036202666660149893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,32,0.003369600077470144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,10240,0.052988799413045254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,7168,0.03786453406016032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,8192,0.04257386525472005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,12288,0.061610666910807285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,16384,0.08047680060068765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,6144,0.03305280009905497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,4096,0.023499733209609984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,5120,0.028455466032028198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,3584,0.021129600207010903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,2560,0.016662399967511496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,3072,0.019511467218399046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,1536,0.011366400122642516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,1024,0.008659199873606364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,2048,0.014485333363215128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,768,0.007177599767843883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,512,0.005793066819508871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,256,0.0036949334045251214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,128,0.003385599950949351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,32,0.0032127998769283296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,64,0.0031178665657838186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,16384,65536,0.5773653030395508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,8192,0.037305601437886554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,7168,0.035239466031392414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,10240,0.043525334199269614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,12288,0.05593066612879435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,16384,0.07164053122202554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,4096,0.020660267273585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,5120,0.02479893366495768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,6144,0.028778666257858278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,3584,0.017895466089248656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,12288,65536,0.40388905207316084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,2048,0.0114656001329422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,2560,0.013864533106486002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,3072,0.016314666469891867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,768,0.00692799985408783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,1536,0.010105599959691364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,1024,0.007778133451938629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,512,0.004408533374468485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,256,0.0038176000118255613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,64,0.0032448001205921174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,128,0.0034453332424163817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,32,0.003294933338960012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,7168,0.027778132756551104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,12288,0.04495893319447835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,8192,0.031975466012954715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,10240,65536,0.3398687998453776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,10240,0.039000535011291505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,16384,0.07232213020324707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,3072,0.01402239998181661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,5120,0.02144533395767212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,3584,0.01730560064315796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,2560,0.013450666268666586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,4096,0.01813439925511678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,6144,0.024488532543182374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,768,0.0061247999469439185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,2048,0.0107424000898997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,1536,0.008777599533398945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,1024,0.007469866673151653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,512,0.004241066674391428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,256,0.0036778666079044344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,128,0.003387733300526937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,64,0.0031946666538715364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,32,0.0031968000034491217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,7168,0.02474453250567118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,8192,65536,0.2681023915608724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,10240,0.03476053476333618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,8192,0.02868586579958598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,12288,0.03997333447138469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,4096,0.01652479966481527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,16384,0.059205333391825356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,6144,0.021960532665252684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,5120,0.02064853310585022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,3584,0.014569600423177084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,2560,0.011206400394439698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,3072,0.012770133217175803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,1536,0.00812906672557195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,2048,0.009611733754475911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,1024,0.006781866649786632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,512,0.003990400085846583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,768,0.005353599786758423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,128,0.003253333270549774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,256,0.003566933423280716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,64,0.003045333425203959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,32,0.0031082667410373688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,7168,65536,0.24681493441263833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,12288,0.039691734313964847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,10240,0.034405334790547686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,7168,0.021499733130137123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,8192,0.024022400379180908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,16384,0.050069332122802734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,6144,0.020485333601633706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,5120,0.016701867183049522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,3072,0.010830932855606079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,4096,0.014035200079282125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,3584,0.012228266398111979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,2560,0.009681066870689392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,1536,0.007102933526039123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,2048,0.008313600222269695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,128,0.0032757334411144257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,1024,0.005915733178456625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,768,0.0044053331017494205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,256,0.0034346667428811393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,512,0.0037439999481042228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,64,0.002985599885384242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,32,0.0030218665798505146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,6144,65536,0.20063254038492837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,7168,0.018131200472513834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,12288,0.03190933267275493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,10240,0.028354134162267047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,8192,0.0237119992574056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,6144,0.01708586613337199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,16384,0.0399402658144633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,5120,0.013982933759689332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,3584,0.010523733496665955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,3072,0.009542399644851684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,2560,0.008482133348782856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,2048,0.0074314668774604796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,4096,0.012310399611790975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,1536,0.006346666812896728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,5120,65536,0.17666239738464357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,64,0.002976000060637792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,256,0.003303466737270355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,1024,0.0050016000866889955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,768,0.004081066697835922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,512,0.0036373332142829893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,128,0.003033600002527237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,32,0.0029845332105954488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,6144,0.017579734325408936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,7168,0.019615999857584634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,8192,0.022456532716751097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,10240,0.02453440030415853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,12288,0.028542933861414592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,16384,0.03761813243230184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,5120,0.015787733594576518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,4096,0.01095360020796458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,3584,0.009660800298055012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,1536,0.006208000083764395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,3072,0.008835200468699138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,2560,0.007962666451931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,2048,0.007037866612275441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,512,0.0036042665441830954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,4096,65536,0.14158719380696613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,1024,0.004614399870236715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,768,0.00403413325548172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,256,0.0033439998825391137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,64,0.0028490667541821797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,128,0.0030026666820049284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,32,0.002959999938805898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,6144,0.015579733252525329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,8192,0.019335466623306274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,7168,0.017366399367650352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,12288,0.025091199080149333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,10240,0.02187733252843221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,5120,0.013778133193651834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,16384,0.03150293429692586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3584,65536,0.1217845360438029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,4096,0.01018453339735667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,2048,0.006886399785677592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,3584,0.009197866916656494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,3072,0.008513066172599792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,2560,0.007719466586907704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,1536,0.006037333110968272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,512,0.0036042665441830954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,1024,0.00448639988899231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,768,0.003996799886226654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,128,0.0030464000999927522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,256,0.0032586666444937387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,64,0.0028970666229724885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,32,0.002921599894762039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,16384,0.02792533238728841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,6144,0.013794133067131042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,7168,0.014975999792416891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,8192,0.016911999384562174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,10240,0.019319466749827065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,12288,0.02230506738026937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,5120,0.012447999914487202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,4096,0.009666132926940917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,3072,65536,0.10662079652150472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,3072,0.008191999793052674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,2560,0.007477333148320516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,3584,0.008835200468699138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,1536,0.0053845331072807315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,2048,0.006758399804433187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,1024,0.004311466713746389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,64,0.002919466545184453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,512,0.0035616000493367515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,768,0.003985066711902618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,256,0.0032661333680152893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,128,0.003010133405526479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,32,0.002921599894762039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2560,65536,0.10127039750417073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,7168,0.012916266918182373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,6144,0.012116266290346782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,16384,0.023075199127197264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,10240,0.016432000199953715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,8192,0.01430506706237793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,12288,0.018566399812698364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,3072,0.008844799796740214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,5120,0.011664000153541566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,3584,0.00923413336277008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,4096,0.009815466403961182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,2560,0.00709440012772878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,2048,0.006105599800745646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,1536,0.005107200145721436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,1024,0.004321066538492838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,768,0.003913599997758865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,64,0.00288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,128,0.0029781334102153777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,512,0.0034933333595593774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,256,0.003176533430814743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,32,0.002812800059715907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,16384,0.019598933060963948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,2048,65536,0.07274133364359538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,8192,0.012001066406567892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,7168,0.010710400342941285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,10240,0.013645866513252258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,6144,0.01039573351542155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,12288,0.015316266814867655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,5120,0.00999786655108134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,4096,0.009146666526794434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,1024,0.0041514667371908825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,3584,0.008770133058230083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,3072,0.00811839997768402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,2560,0.006828799843788147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,1536,0.004870399832725525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,2048,0.005694933235645294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,512,0.0034389334420363107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,768,0.003868799904982249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,65536,0.05857280095418295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,256,0.0031413334111372627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,32,0.002856533229351044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,128,0.0029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1536,64,0.0027583998938401537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,10240,0.011322666207949321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,12288,0.012307199835777282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,16384,0.014601600170135499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,8192,0.010247466961542766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,7168,0.009746133287747701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,6144,0.009033600489298504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,5120,0.008611200253168742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,4096,0.0074538667996724445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,3072,0.007156266768773396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,3584,0.007787733276685078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,2560,0.006287999947865804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,768,0.003824000060558319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,2048,0.005351466437180838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,1536,0.004732800026734671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,1024,0.004062933226426443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,65536,0.039877335230509445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,512,0.0034645333886146545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,256,0.0031925333042939507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,64,0.002808533360560735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,128,0.0029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,1024,32,0.0028629332780838014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,10240,0.00983786682287852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,12288,0.010456533233324686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,16384,0.012065066893895467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,7168,0.008430932958920796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,8192,0.008829866846402485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,6144,0.007934933404127757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,5120,0.007959466675917308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,4096,0.007124266525109608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,2560,0.006155733267466227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,3584,0.007570133109887441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,3072,0.006883200009663899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,2048,0.005510400235652924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,768,0.003740799923737844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,65536,0.033353598912556966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,1536,0.004779733220736186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,1024,0.004082133372624716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,512,0.00335359995563825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,256,0.0030933332939942675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,128,0.0028384000062942503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,32,0.0028405333558718365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,768,64,0.002780800064404805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,10240,0.008797867099444072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,16384,0.010309333602587383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,12288,0.009217066566149394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,6144,0.007110400001207988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,8192,0.008078933258851369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,7168,0.007569066683451335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,4096,0.006704000135262807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,5120,0.007123200098673503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,65536,0.02439039945602417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,3584,0.007136000196139018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,3072,0.006821333368619282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,1024,0.004031999905904134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,2560,0.006028800209363302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,2048,0.005362133185068766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,1536,0.004694400231043497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,768,0.003669333209594091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,512,0.003302400062481562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,128,0.0027818667391935987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,64,0.0027445333699385325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,256,0.003009066730737686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,512,32,0.0027530667682488757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,16384,0.008426666259765625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,10240,0.007107200225194295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,7168,0.006854400038719177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,12288,0.007109333574771881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,5120,0.00699946681658427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,8192,0.006937600175539653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,6144,0.0065760001540184024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,65536,0.01530880033969879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,2048,0.005373866856098175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,4096,0.006626133124033611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,3072,0.006796800096829732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,3584,0.00720000018676122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,2560,0.006054399907588959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,768,0.0036373332142829893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,1536,0.004693333307902018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,1024,0.003984000037113826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,512,0.003340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,128,0.0028362666567166646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,64,0.002628266563018163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,256,0.002974933385848999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,256,32,0.0027477333943049112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,16384,0.006793599824110668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,10240,0.006844800213972728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,12288,0.006714666883150737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,8192,0.0067658667763074234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,65536,0.010999466975529988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,7168,0.006605866551399231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,6144,0.006444799900054932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,3584,0.006900266806284587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,4096,0.006378666559855144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,5120,0.006821333368619282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,2560,0.006031999985376993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,3072,0.006528000036875407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,1536,0.004661333560943603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,2048,0.005345066885153452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,1024,0.003946666667858759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,768,0.00363520011305809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,512,0.0032874666154384612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,128,0.002958933264017105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,256,0.0029813334345817565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,32,0.0026922665536403658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,128,64,0.002639999985694885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,12288,0.006673066814740498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,16384,0.006753066678841908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,8192,0.006753066678841908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,65536,0.009388800462086995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,10240,0.006781866649786632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,7168,0.006523733337720235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,6144,0.006359466910362243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,5120,0.006674133241176605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,4096,0.006316799918810527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,3072,0.006524799764156342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,3584,0.0068256000677744556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,2560,0.00590826670328776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,2048,0.005224533379077911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,256,0.003032533327738444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,1024,0.0038975998759269714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,1536,0.004588800172011057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,512,0.00322026660044988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,768,0.0035317334036032355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,128,0.0027306665976842242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,64,0.002629333237806956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,64,32,0.002664533257484436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,10240,0.006842666864395141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,16384,0.006748799979686737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,12288,0.0067093332608540845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,65536,0.00997759997844696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,8192,0.006609066824118296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,7168,0.006479999919732411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,4096,0.00631466656923294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,6144,0.006374399860699971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,3584,0.006762666503588359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,5120,0.006724266707897187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,2560,0.006002133091290792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,3072,0.00645653357108434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,2048,0.0052490666508674625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,1024,0.0038965334494908653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,1536,0.004583466549714406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,128,0.002834133307139079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,512,0.0032127998769283296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,768,0.0035487999518712364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,256,0.002915200094381968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,64,0.002624000112215678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,64,32,32,0.002624000112215678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,4096,0.13659839630126952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,5120,0.16921067237854004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,6144,0.2003317356109619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,7168,0.233243735631307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,8192,0.26812267303466797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,3584,0.11902186870574952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,3072,0.10409706433614094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,2560,0.09438292980194092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,10240,0.33716373443603515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,2048,0.07926719983418783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,1024,0.04233706792195638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,1536,0.06087786753972372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,768,0.03304959932963054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,256,0.01341973344484965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,128,0.008330666522185007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,512,0.02387626568476359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,64,0.006410666803518932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,32,0.0055189331372578945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,12288,0.3957248051961263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,7168,0.0577184001604716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,8192,0.06513173182805379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,10240,0.08101759751637777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,12288,0.09570666948954264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,6144,0.0559989333152771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,65536,16384,0.5273781458536784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,16384,0.12797653675079346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,5120,0.04276373386383057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,4096,0.035176531473795576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,3584,0.03105066617329915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,768,0.00920853316783905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,3072,0.028492800394694012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,2560,0.02358186642328898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,512,0.007259733478228251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,2048,0.021412267287572225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,1536,0.017112533251444496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,1024,0.012449066837628682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,256,0.004509866734345754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,64,0.003455999990304311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,128,0.0037269333998362223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,32,0.003554133325815201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,6144,0.04141120115915935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,7168,0.04688106775283814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,8192,0.05399680137634277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,10240,0.06676692962646484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,12288,0.07302292982737223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,16384,0.10356266498565674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,5120,0.035035733381907144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,4096,0.028920533259709676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,3584,0.025364265839258833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,3072,0.02255786657333374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,2560,0.018985599279403687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,2048,0.017654399077097573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,512,0.006459733347098033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,128,0.0035455999275048576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,768,0.008006399869918824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,1024,0.00973653296629588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,1536,0.012827733159065246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,256,0.003966933240493139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,32,0.003339733431736628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,64,0.0032768001159032187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,8192,0.045324798425038657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,7168,0.03834240039189656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,10240,0.05418026844660441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,12288,0.072597336769104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,16384,0.09277973175048829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,6144,0.035027201970418295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,5120,0.02863360047340393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,4096,0.02411839962005615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,3584,0.02129813234011332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,2560,0.016742400328318276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,1536,0.010886399944623312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,1024,0.008378666639328004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,2048,0.013733333349227906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,3072,0.01944213310877482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,768,0.00755626658598582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,512,0.005871999760468801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,256,0.003824000060558319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,128,0.0034602666894594826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,32,0.003278933217128118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,64,0.00323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,10240,0.04312746524810791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,8192,0.03507733345031738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,16384,65536,0.5396031697591146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,12288,0.055062401294708255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,16384,0.07023573716481527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,12288,65536,0.40013653437296554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,7168,0.031172267595926922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,6144,0.02727893392244975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,4096,0.019858133792877198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,5120,0.02565866708755493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,3072,0.016743467251459757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,3584,0.017579734325408936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,2560,0.013530666629473368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,1536,0.009236266215642292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,1024,0.007259733478228251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,768,0.006257066627343495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,2048,0.011410133043924967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,512,0.00459199994802475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,256,0.0037216000258922578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,128,0.0033781332274278007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,32,0.003310933212439219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,64,0.0032106667757034303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,8192,0.03170453310012818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,7168,0.028228267033894854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,12288,0.04757973353068034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,10240,0.03888320128122966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,10240,65536,0.35422932306925453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,16384,0.05933866500854492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,4096,0.01832533280054728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,5120,0.02141973376274109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,6144,0.02467306653658549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,3072,0.015396266182263692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,3584,0.015899733702341715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,2048,0.01067200005054474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,2560,0.012989866733551025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,1024,0.006884266436100006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,1536,0.008705066641171773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,256,0.003607466568549474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,768,0.0059914668401082356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,512,0.0040501333773136135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,64,0.0030773334205150605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,128,0.003269333392381668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,32,0.0031669333577156065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,8192,65536,0.2658016045888265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,10240,0.03475840091705322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,8192,0.028780800104141236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,7168,0.0249834676583608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,12288,0.04034133354822795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,6144,0.023614933093388878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,16384,0.05271573464075724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,5120,0.019550933440526327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,4096,0.016586666305859886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,3584,0.01455573340257009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,1536,0.008362666765848795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,2560,0.011087999741236369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,3072,0.012973866860071816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,2048,0.00990613301595052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,512,0.004009599983692169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,256,0.003479466587305069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,1024,0.006469333171844482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,768,0.0053247998158137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,32,0.003047466774781545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,128,0.003186133255561193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,64,0.0029866665601730345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,7168,65536,0.25102720260620115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,10240,0.03330453236897786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,7168,0.02137493292490641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,8192,0.023990400632222495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,12288,0.03814506530761719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,6144,0.020137600104014077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,5120,0.016459733247756958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,16384,0.048393599192301434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,3072,0.010931199789047242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,2048,0.00848533312479655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,3584,0.012177067001660664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,4096,0.014103466272354126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,2560,0.009612799684206644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,1536,0.007284266750017802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,768,0.0043935999274253845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,1024,0.005868799984455109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,512,0.0037418665985266366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,256,0.0033791999022165934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,128,0.0031370667119820913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,32,0.0030826665461063385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,64,0.0029397333661715193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,6144,65536,0.201637331644694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,7168,0.020450133085250854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,8192,0.023834667603174844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,6144,0.019834667444229126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,10240,0.026552534103393553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,12288,0.030869332949320476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,3584,0.012971733013788858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,16384,0.03897279898325602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,3072,0.012007466952006022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,4096,0.014151466886202493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,5120,0.016365866859753928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,2048,0.007430399954319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,2560,0.008427733182907104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,1536,0.006428800026575724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,256,0.003443199892838796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,1024,0.004805333415667216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,768,0.003910399973392487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,512,0.003571200122435888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,5120,65536,0.1907423973083496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,128,0.002951466788848241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,32,0.003036800026893616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,64,0.0028181334336598715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,7168,0.019370667139689126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,12288,0.027970133225123088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,6144,0.01690666675567627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,8192,0.021036799748738608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,10240,0.025998934110005693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,16384,0.03496533234914144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,5120,0.015389866630236306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,2048,0.007043200234572093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,4096,0.013153066237767538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,3584,0.01206933359305064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,3072,0.011186133821805317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,2560,0.008102400104204814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,4096,65536,0.1387285391489665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,1024,0.004428799947102865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,1536,0.00625493327776591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,768,0.0039327998956044516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,512,0.003565866748491923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,128,0.002976000060637792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,256,0.0032032000521818793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,32,0.002930133293072383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,64,0.0029045333464940387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,7168,0.01736746629079183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,8192,0.019074134031931558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,6144,0.01548693378766378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,10240,0.021746132771174112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,12288,0.02504853407541911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,5120,0.013834666212399802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,16384,0.031464533011118574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,2048,0.006877866884072621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3584,65536,0.1249674638112386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,4096,0.012302933136622111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,2560,0.007632000247637431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,1536,0.005947733422120413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,3584,0.011368532975514729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,3072,0.010371200243631999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,1024,0.004285866518815359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,768,0.003931733220815659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,256,0.0032469332218170166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,128,0.0029994666576385496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,512,0.0035391998787721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,64,0.002899199972550074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,32,0.0030847998956839246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,6144,0.013281066219011942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,10240,0.01911679903666178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,7168,0.014859732985496522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,8192,0.016356266538302102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,12288,0.022100265820821127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,3072,65536,0.10570027033487957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,16384,0.027454932530721027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,5120,0.012167466680208842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,2048,0.006934399902820587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,4096,0.010790399710337321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,2560,0.0073738664388656614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,3584,0.01009173293908437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,3072,0.009551999966303508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,768,0.003933866570393244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,1536,0.005434666574001312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,1024,0.004221866528193155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,512,0.0035264000296592714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,256,0.003234133372704188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,32,0.0029781334102153777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,128,0.0029706666866938275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,64,0.002762666592995326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2560,65536,0.09868799845377604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,8192,0.01423679987589518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,7168,0.01322773297627767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,6144,0.012294399738311767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,12288,0.018357332547505698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,10240,0.016053332885106405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,5120,0.011788800358772278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,16384,0.022397865851720176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,4096,0.010035199920336406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,3584,0.00858133335908254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,3072,0.00791786660750707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,1536,0.0051125332713127134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,2048,0.006333866715431213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,2560,0.007187200089295705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,1024,0.0041514667371908825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,768,0.003949866692225138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,128,0.0029813334345817565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,512,0.0034773332377274835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,64,0.0028223998844623564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,256,0.0031360000371932983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,32,0.00288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,2048,65536,0.07128533522288004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,10240,0.013539200027783712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,12288,0.01530346671740214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,8192,0.011877333124478657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,16384,0.019082667430241902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,7168,0.011098666985829671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,6144,0.01034346620241801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,3584,0.00865600009759267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,5120,0.009528533617655436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,3072,0.007649066547552745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,2560,0.006502399841944377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,4096,0.008851200342178345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,2048,0.00555733342965444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,1536,0.004894933104515076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,1024,0.00418453315893809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,256,0.003123199939727783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,512,0.0034677334129810332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,768,0.003824000060558319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,128,0.0029728000362714132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,64,0.0027690666417280836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,65536,0.05804479916890463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1536,32,0.002868266652027766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,16384,0.01474240024884542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,10240,0.011403733491897583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,12288,0.012409599622090657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,8192,0.010341333349545796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,7168,0.00931946635246277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,4096,0.00823466678460439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,5120,0.009401599566141765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,6144,0.010100266337394715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,3584,0.008107733229796093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,3072,0.007288533449172974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,1536,0.004829866687456766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,2560,0.006492800017197926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,2048,0.005490133166313171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,1024,0.004113066693147024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,65536,0.038599467277526854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,768,0.003765333443880081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,128,0.002850133428970973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,512,0.0034101332227389016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,256,0.0030517332255840302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,64,0.0028192001084486645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,1024,32,0.0028064000109831494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,10240,0.00990826686223348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,16384,0.011934933066368104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,8192,0.009087999661763508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,7168,0.008436266581217449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,12288,0.010114133358001709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,4096,0.007166933516661327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,3584,0.007390933235486348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,6144,0.008804266651471455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,5120,0.00819413314263026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,3072,0.006814933319886525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,2560,0.006105599800745646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,2048,0.005467733244101206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,1536,0.004816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,65536,0.03276159962018331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,1024,0.00409706657131513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,256,0.003065599997838338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,512,0.003403733422358831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,128,0.002846933404604594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,32,0.0028160000840822858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,768,0.0037493333220481873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,768,64,0.002738133321205775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,16384,0.010214400291442872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,7168,0.007410133381684621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,12288,0.009839999675750732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,8192,0.008106666803359985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,10240,0.009112532933553059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,6144,0.006954666475454967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,5120,0.0070933332045873005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,3584,0.007099733253320058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,65536,0.023729066054026283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,4096,0.006702933212121327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,2560,0.006044800082842508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,3072,0.006778666873772939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,768,0.0036799999574820197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,2048,0.005354666709899902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,1536,0.004713599880536398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,1024,0.003992533435424169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,256,0.0030432000756263735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,512,0.0033215999603271483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,32,0.002716800073782603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,128,0.002834133307139079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,512,64,0.0026730666557947796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,7168,0.006798933446407318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,16384,0.008566400408744812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,10240,0.007079466680685679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,12288,0.007228800157705943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,8192,0.006906666855017345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,6144,0.006502399841944377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,5120,0.00697813332080841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,65536,0.015294933319091797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,4096,0.006579199930032094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,3072,0.00664213349421819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,3584,0.007022933165232341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,1536,0.0046517332394917805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,2560,0.006027733286221823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,2048,0.005369600156943003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,512,0.003305600086847941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,1024,0.003974399964014689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,768,0.0036309334139029183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,256,0.0030069333811601003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,64,0.0026591998835404714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,16384,0.006836266815662384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,128,0.002829866607983907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,256,32,0.002681600054105123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,65536,0.01125866671403249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,12288,0.006762666503588359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,6144,0.006486399968465169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,10240,0.006828799843788147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,8192,0.0067114666104316715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,7168,0.006632533172766368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,5120,0.006839466591676076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,4096,0.00647573322057724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,2560,0.0060245335102081295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,3072,0.0065301333864529925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,3584,0.00690773328145345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,2048,0.005348266661167144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,1536,0.004649599889914194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,1024,0.003979733337958654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,256,0.0030752000709374744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,768,0.0036138666172822317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,512,0.0032672000428040824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,64,0.0026687999566396077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,12288,0.006697600086530049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,128,0.0027701333165168762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,16384,0.006635733445485433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,128,32,0.002624000112215678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,65536,0.009818666179974874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,10240,0.006856533388296763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,8192,0.006631466746330261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,6144,0.006364800035953522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,7168,0.006425599753856659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,4096,0.0062837332487106325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,5120,0.006726400057474773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,3584,0.0067658667763074234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,1536,0.004615466793378194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,3072,0.0064522668719291685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,2560,0.005905066430568695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,2048,0.005226666728655497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,1024,0.0038933334251244865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,768,0.0035274667044480645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,128,0.0028245332340399425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,512,0.003223466624816259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,256,0.0029109333952267963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,64,0.0025888000925381976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,64,32,0.0026602665583292644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,10240,0.006638933221499126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,16384,0.006596266726652781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,12288,0.0065653334061304735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,65536,0.01033066709836324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,7168,0.006358399987220764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,6144,0.00626453310251236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,8192,0.006539733211199443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,4096,0.00626986672480901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,5120,0.006772266825040181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,3072,0.006422399977842967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,3584,0.006650666892528534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,768,0.0036245333651701607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,2048,0.005230933427810669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,1536,0.004598399996757508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,2560,0.005910400052865346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,1024,0.0038730666041374207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,64,0.002746666719516118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,256,0.0029056000212828318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,512,0.0032042667269706728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,128,0.0027327999472618104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,48,32,32,0.0025770666698614756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,4096,0.1356554667154948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,5120,0.16804265975952148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,6144,0.2001567999521891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,7168,0.2318730672200521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,8192,0.2684757232666016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,3584,0.11806613604227703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,3072,0.10256746610005696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,2560,0.09329813321431478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,1536,0.060140800476074216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,2048,0.07923519611358643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,512,0.02314773400624593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,256,0.011965866883595784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,768,0.03208426634470622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,10240,0.3303167978922526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,128,0.007855999966462452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,1024,0.041637333234151204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,32,0.00508480022350947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,64,0.005393066505591074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,12288,0.3940160115559896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,7168,0.05752533276875814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,8192,0.06614506642023722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,10240,0.08102400302886963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,12288,0.09567786852518717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,6144,0.05600853363672892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,4096,0.03540906508763631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,65536,16384,0.5247082710266113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,5120,0.042789332071940106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,16384,0.14169599215189616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,3584,0.03231786688168843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,3072,0.02985493342081706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,2048,0.019388800859451293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,2560,0.023321600755055745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,1024,0.012338133653004964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,1536,0.016777600844701132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,768,0.009966933727264404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,512,0.007063466807206471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,128,0.0036127999424934386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,256,0.004167466859022776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,64,0.0032927999893824257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,32,0.0034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,6144,0.04196266730626424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,7168,0.04535679817199707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,8192,0.05469760100046793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,10240,0.06792960166931153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,12288,0.07969280083974203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,3072,0.02257706721623739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,4096,0.027550933758417766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,3584,0.0255295991897583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,5120,0.03560320138931274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,16384,0.10650239785512287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,2560,0.020103466510772706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,2048,0.016203733285268147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,1024,0.009489066402117411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,768,0.007913599908351897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,1536,0.012283733487129212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,128,0.003403733422358831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,512,0.006728533407052357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,256,0.0038133333126703895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,64,0.0032117334504922234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,32,0.0031797334551811217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,7168,0.038737066586812335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,8192,0.04374399979909261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,10240,0.054332800706227626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,12288,0.07048213481903076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,16384,0.09067946275075277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,5120,0.02900693416595459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,6144,0.03387413422266643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,4096,0.023947733640670776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,3584,0.021513599157333373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,3072,0.02068586746851603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,768,0.007380266487598419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,1024,0.008220799763997396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,1536,0.010812800129254658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,2560,0.01618666648864746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,2048,0.013393066326777139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,512,0.005740800003210703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,128,0.0032842665910720824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,256,0.0036042665441830954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,64,0.003053866575161616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,32,0.003070933371782303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,8192,0.0344703992207845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,10240,0.04225813150405884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,12288,0.053928534189860024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,16384,65536,0.5380277633666992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,16384,0.06942719618479411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,12288,65536,0.39967041015625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,6144,0.02690453330675761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,7168,0.030804266532262165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,4096,0.021243733167648316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,5120,0.023194666703542074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,3072,0.016708266735076905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,3584,0.017621332406997682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,768,0.006116266548633576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,2560,0.013367467125256858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,1024,0.007214933137098948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,2048,0.011086933811505635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,1536,0.009215999643007915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,256,0.0035904000202814737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,512,0.004259199897448222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,128,0.0032586666444937387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,64,0.0030261332790056865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,32,0.0031167998909950255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,10240,65536,0.36825920740763346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,7168,0.028076799710591634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,8192,0.03136853377024333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,10240,0.038738131523132324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,12288,0.045297066370646163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,16384,0.059127465883890784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,2560,0.012063999970753986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,5120,0.021230934063593547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,3584,0.015870933731396995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,3072,0.014018133282661438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,6144,0.02661120096842448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,4096,0.017758933703104655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,2048,0.010339200496673584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,128,0.003160533308982849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,1536,0.008692266543706258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,1024,0.006844800213972728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,512,0.0039018665750821433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,768,0.005853866537412008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,256,0.0034453332424163817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,64,0.0029525332152843474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,32,0.0030080000559488933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,8192,65536,0.2653418699900309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,8192,0.02843093276023865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,10240,0.035076268513997394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,7168,0.02518613338470459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,12288,0.040491731961568196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,16384,0.050220799446105954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,5120,0.019432532787322997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,6144,0.02423786719640096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,4096,0.016360533237457276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,2048,0.00976213316122691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,3584,0.014525866508483887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,2560,0.010870400071144103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,1536,0.008220799763997396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,3072,0.01291306714216868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,768,0.005188266436258951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,1024,0.006391466657320659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,512,0.003853866706291834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,128,0.0032277333239714304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,256,0.003357866654793421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,64,0.0029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,32,0.002994133283694585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,7168,65536,0.24846720695495605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,7168,0.020924800634384157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,8192,0.023733333746592204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,12288,0.03710720141728719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,10240,0.032332799832026166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,5120,0.01616853376229604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,6144,0.020151466131210327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,16384,0.04733866850535075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,3072,0.010795733332633973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,3584,0.012503467003504434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,2560,0.009571199615796406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,4096,0.013766400019327798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,2048,0.008411733309427898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,1536,0.007211733361085255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,768,0.0045045331120491024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,1024,0.00574186642964681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,512,0.0036629334092140196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,256,0.0032885332902272543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,128,0.0030752000709374744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,32,0.002958933264017105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,64,0.002899199972550074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,6144,65536,0.19349013964335124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,8192,0.0230730672677358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,7168,0.020040533939997354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,16384,0.03782399892807007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,10240,0.025931733846664428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,6144,0.019220266739527384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,12288,0.030130134026209517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,3584,0.012651733557383218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,5120,0.01598186691602071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,3072,0.011437867085138958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,2560,0.008267733454704284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,4096,0.013834666212399802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,1536,0.006428800026575724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,2048,0.007383466760317485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,128,0.003014400104681651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,1024,0.004695466657479604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,512,0.0035082665582497918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,256,0.003235200047492981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,768,0.0038453333079814913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,64,0.002791466563940048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,5120,65536,0.18809386889139812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,32,0.002926933268706004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,6144,0.016645333170890807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,7168,0.019688532749811808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,12288,0.027350399891535444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,10240,0.023822933435440063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,8192,0.020142932732899986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,16384,0.03441813389460246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,5120,0.01461120049158732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,3584,0.011884799599647522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,1536,0.006150400141874949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,4096,0.012797866264979044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,3072,0.01092906693617503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,2560,0.008017066617806752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,2048,0.0070250665148099255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,4096,65536,0.13570559819539388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,768,0.003852800031503042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,1024,0.004540800054868063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,512,0.0034805332620938623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,256,0.0031701333820819853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,64,0.0028405333558718365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,128,0.0028970666229724885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,32,0.0029077333708604175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,6144,0.014819199840227762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,8192,0.018557866414388023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,10240,0.021945599714914957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,7168,0.017034665743509928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,12288,0.025038933753967284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,16384,0.031228800614674885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,5120,0.0132341335217158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,4096,0.011979732910792034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3584,65536,0.1262399991353353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,3584,0.011229866743087768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,3072,0.010311466455459595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,2048,0.006837333242098491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,2560,0.007648000121116638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,768,0.0038794666528701783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,1536,0.0058794667323430385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,1024,0.0041685332854588825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,512,0.003502933432658514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,256,0.0031317333380381264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,128,0.0029493334392706556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,32,0.0029397333661715193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,64,0.002845866729815801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,7168,0.014628266294797262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,6144,0.01318186620871226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,8192,0.0160480002562205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,10240,0.01872640053431193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,16384,0.028149332602818804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,3072,65536,0.10193920135498047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,12288,0.02137813369433085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,5120,0.011913599570592244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,4096,0.010802132884661357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,3584,0.010001066327095031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,1024,0.004163199911514918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,2560,0.007340799768765767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,2048,0.0068256000677744556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,3072,0.009355733791987102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,1536,0.0052906667192777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,256,0.003219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,768,0.0038058665891488397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,512,0.0034858666360378264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,64,0.0028437333802382152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,128,0.002951466788848241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,32,0.002829866607983907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,7168,0.012882133324941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,8192,0.01392213304837545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,6144,0.011955199639002483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,12288,0.017691733439763387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,16384,0.02227733333905538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,10240,0.01571626663208008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2560,65536,0.09562453428904215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,4096,0.009826133648554485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,5120,0.01097706655661265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,2560,0.007158400118350982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,3072,0.008577066659927367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,1024,0.004129066566626231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,3584,0.00899839997291565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,2048,0.005985066791375478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,1536,0.004964266717433929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,768,0.003790933390458425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,128,0.0029098667204380036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,512,0.0034400001168251038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,256,0.003091199944416682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,64,0.0027744000156720476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,32,0.0029493334392706556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,2048,65536,0.06979093551635743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,10240,0.013327999909718832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,16384,0.018730666240056357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,12288,0.015050666530927024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,6144,0.01016213297843933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,8192,0.011809066931406657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,7168,0.010972799857457478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,4096,0.00874133308728536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,5120,0.009423999985059103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,3072,0.007853866616884867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,3584,0.0084906667470932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,2560,0.006449066599210103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,2048,0.005496533215045929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,1024,0.00408746674656868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,1536,0.004781866570313772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,512,0.0033642667035261786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,768,0.0037461332976818085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,32,0.002850133428970973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,128,0.0028351999819278715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,256,0.003147733211517334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,64,0.0027615999182065325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1536,65536,0.05664960145950317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,10240,0.01118293305238088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,7168,0.009177600344022114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,12288,0.012216533223787945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,8192,0.010071466366449993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,16384,0.014454399545987448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,6144,0.008298666775226593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,4096,0.007336533566315968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,5120,0.008386133114496867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,3584,0.007283199826876323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,3072,0.006849066913127899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,1024,0.004018133382002512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,2560,0.006077866752942403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,2048,0.005400533477465311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,1536,0.004691199958324432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,768,0.003671466559171677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,65536,0.03794240156809489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,128,0.002865066627661387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,512,0.0032831999162832894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,32,0.002703999976317088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,256,0.002977066735426585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,1024,64,0.0027146667242050173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,10240,0.009725866715113322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,16384,0.01164906620979309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,12288,0.010326400399208069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,8192,0.008982400099436443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,7168,0.00844586690266927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,6144,0.007620266576608021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,5120,0.007585066556930542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,4096,0.006966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,2048,0.005383466680844625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,2560,0.006037333110968272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,3072,0.006897066533565521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,3584,0.007186133166154225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,65536,0.03208319942156474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,1536,0.004682666560014089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,1024,0.004013866682847341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,512,0.003310933212439219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,768,0.003673599908749262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,256,0.002998399982849757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,128,0.002796799937884013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,64,0.0027029333015282948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,768,32,0.0027189334233601887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,12288,0.008890666564305623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,10240,0.008453333377838134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,16384,0.010024533669153849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,8192,0.007619200150171916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,7168,0.007725866635640462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,6144,0.006676266590754191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,4096,0.006738133231798808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,3584,0.0070933332045873005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,5120,0.0070783997575442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,65536,0.0229695995648702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,3072,0.006716800232728322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,2560,0.006025599936644236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,2048,0.0053610667586326596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,1024,0.003984000037113826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,768,0.003626666714747747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,1536,0.004664533336957296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,512,0.0032821332414944967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,256,0.002985599885384242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,64,0.002647466709216436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,128,0.002833066632350286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,512,32,0.0026506667335828146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,16384,0.008008533219496409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,12288,0.00695360004901886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,4096,0.006663466493288676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,10240,0.00705813318490982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,7168,0.006680533289909363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,8192,0.006855466465155284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,5120,0.007055999835332234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,65536,0.014851199587186179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,6144,0.006517333288987477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,3072,0.006676266590754191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,3584,0.0069354668259620665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,2048,0.005321600039800008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,2560,0.005997866888840993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,768,0.003623466690381368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,1536,0.004673066735267639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,1024,0.003952000041802724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,256,0.00296426663796107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,512,0.0032970666885375976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,64,0.0026709333062171934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,10240,0.006963199873765309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,128,0.0027722666660944624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,256,32,0.0027029333015282948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,16384,0.0068234667181968685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,12288,0.006650666892528534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,65536,0.011180800199508668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,8192,0.006706133484840393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,6144,0.00660693347454071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,7168,0.006548266609509785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,5120,0.006858666737874349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,4096,0.006392533580462138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,2560,0.005981866518656413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,1536,0.004698666433493296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,3584,0.0068917334079742435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,3072,0.00651093324025472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,2048,0.005335466563701629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,1024,0.0039327998956044516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,256,0.002985599885384242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,128,0.0028864001234372456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,768,0.0035978667438030243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,512,0.0032245332996050516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,64,0.002628266563018163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,128,32,0.0026101333399613695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,16384,0.006774400174617767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,65536,0.010424533486366272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,12288,0.006775466601053874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,10240,0.0069567998250325514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,8192,0.006690133114655812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,7168,0.006533333162466686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,6144,0.006302933394908905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,4096,0.00622080018122991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,5120,0.006602666775385539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,3072,0.006389333307743073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,3584,0.006751999755700429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,2560,0.005958400170008342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,2048,0.005240533252557119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,256,0.0029418667157491045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,768,0.003521066655715307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,1024,0.003861333429813385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,1536,0.004556799928347269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,64,0.002737066646416982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,512,0.0031744000812371576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,128,0.0027306665976842242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,8192,0.006505600114663441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,16384,0.0064298664530118305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,12288,0.006833066542943318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,65536,0.009336533149083455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,64,32,0.0026069333155949908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,10240,0.006738133231798808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,7168,0.006243200103441874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,6144,0.006078933179378509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,5120,0.0065749332308769224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,4096,0.00598826656738917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,3584,0.0065194666385650635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,3072,0.006208000083764395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,1536,0.004542933404445648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,2560,0.0058794667323430385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,2048,0.0051818668842315676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,768,0.0035743998984495797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,1024,0.0038783999780813852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,128,0.0027253332237402597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,512,0.0031658666829268134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,256,0.002865066627661387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,64,0.002540799975395203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,32,32,32,0.0025472000241279604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,4096,0.13455039660135906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,5120,0.166702938079834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,6144,0.19806079864501952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,7168,0.2307530721028646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,8192,0.26269332567850745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,3584,0.11692159970601398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,3072,0.1015349308649699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,2560,0.0886250654856364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,1536,0.059146666526794435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,2048,0.07823146979014078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,256,0.011520000298817952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,10240,0.32797441482543943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,1024,0.04087893168131511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,64,0.004969599843025208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,512,0.022337067127227783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,128,0.007521066566308339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,768,0.031326933701833086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,32,0.004546133180459341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,12288,0.3996159871419271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,7168,0.0567957321802775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,8192,0.0643893321355184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,6144,0.055547734101613365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,10240,0.07954986890157065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,12288,0.09412053426106771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,3584,0.03056640028953552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,16384,0.12357760270436605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,5120,0.041943466663360594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,4096,0.03467520078023274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,3072,0.02946666677792867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,2560,0.025334399938583375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,768,0.008905599514643352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,1536,0.01644480029741923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,512,0.006917333106199901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,1024,0.012239999572436015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,2048,0.01917333404223124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,256,0.004056533426046371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,64,0.0031466667850812277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,128,0.003433600068092346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,32,0.003190399954716364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,65536,16384,0.523361078898112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,7168,0.04722453355789184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,10240,0.06736000378926596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,8192,0.053666134675343834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,6144,0.0452458659807841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,12288,0.07893866697947184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,16384,0.10407360394795735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,3072,0.022184532880783082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,4096,0.028439466158548993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,3584,0.025163733959198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,5120,0.0348746657371521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,2560,0.020194133122762047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,1024,0.009317333499590557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,512,0.006169599791367849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,768,0.0077237332860628765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,128,0.003289599965016047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,1536,0.01253546675046285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,2048,0.017180800437927246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,256,0.0037258667250474296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,64,0.0030602666238943735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,32,0.003049599876006444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,8192,0.04323840141296387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,10240,0.058210134506225586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,7168,0.038278400897979736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,12288,0.0679423967997233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,3584,0.022269866863886514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,16384,0.09219199816385905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,4096,0.023494400580724082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,5120,0.02863146662712097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,6144,0.03344853321711223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,3072,0.019527467091878255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,1024,0.00811413327852885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,1536,0.010702932874361675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,2560,0.01601066688696543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,2048,0.013247999548912048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,768,0.007281066477298736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,32,0.003018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,512,0.00553599993387858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,256,0.0034858666360378264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,128,0.003180799881617228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,64,0.0029685333371162414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,8192,0.03399146795272827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,10240,0.04169066747029622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,12288,0.05226879914601644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,16384,65536,0.5285557428995769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,16384,0.06718506813049316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,7168,0.03038933277130127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,5120,0.022959999243418374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,6144,0.026504532496134443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,12288,65536,0.39431145985921223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,4096,0.021157334248224892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,3584,0.017054933309555053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,1024,0.007113599777221679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,1536,0.009998933474222819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,768,0.006142933170000712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,3072,0.015194666385650635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,2560,0.013132799665133157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,2048,0.01097920040289561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,512,0.004026666780312856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,256,0.0034346667428811393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,128,0.0031221332649389905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,64,0.0029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,32,0.003028266628583272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,10240,65536,0.3747093200683594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,7168,0.030379732449849445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,6144,0.028857600688934327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,8192,0.03419946829477946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,10240,0.041041068236033124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,12288,0.0475872000058492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,16384,0.06113173166910807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,4096,0.01767146587371826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,3072,0.013843199610710144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,3584,0.015706666310628257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,2560,0.012005333105723064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,1536,0.009301333626111349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,5120,0.02089386582374573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,2048,0.010218666990598042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,1024,0.006702933212121327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,256,0.003332266708215078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,512,0.003751466671625773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,768,0.005840000013510386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,128,0.0031210665901501974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,64,0.0028970666229724885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,32,0.002916266769170761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,8192,65536,0.2638591925303141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,6144,0.023907200495402018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,7168,0.024654932816823325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,8192,0.026763733228047686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,10240,0.03442453145980835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,12288,0.03975679874420166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,16384,0.0517845352490743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,5120,0.018883200486501057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,2560,0.011000532905260723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,4096,0.016006400187810264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,3584,0.013666133085886637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,3072,0.01248426636060079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,2048,0.009566932916641235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,1536,0.007889066636562348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,64,0.002919466545184453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,1024,0.006279466549555461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,768,0.00487360010544459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,512,0.003689600030581156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,128,0.00306986669699351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,256,0.0033258666594823206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,32,0.0029696000119050344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,7168,65536,0.23416639963785807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,7168,0.02146240075429281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,8192,0.023285333315531412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,10240,0.03127893408139547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,12288,0.037460267543792725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,16384,0.04956053495407105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,6144,0.01992959976196289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,5120,0.01635840038458506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,3072,0.010700800021489461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,4096,0.013753599921862283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,1536,0.007228800157705943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,3584,0.012085333466529846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,2560,0.009434666236241658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,2048,0.008239999910195668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,512,0.003502933432658514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,1024,0.005645866692066193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,6144,65536,0.19363946914672853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,768,0.004231466849644979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,128,0.002980266759792964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,256,0.0032543999453385672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,64,0.002792533238728841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,32,0.002903466671705246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,6144,0.01767359972000122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,8192,0.022338134050369263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,7168,0.020348799228668214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,10240,0.026936533053716023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,12288,0.029174399375915528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,16384,0.040853333473205564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,5120,0.015682133038838704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,3072,0.011335466305414836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,3584,0.012442666292190551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,2048,0.0073632001876831055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,2560,0.00827519992987315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,4096,0.013398399949073792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,1536,0.006354133288065593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,512,0.0034783999125162757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,1024,0.004517333209514618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,768,0.0037813333173592886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,256,0.0031626666585604347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,128,0.002926933268706004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,32,0.0029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,64,0.002865066627661387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,5120,65536,0.19111146926879882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,7168,0.01843093236287435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,8192,0.019713066021601357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,6144,0.017163733641306557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,10240,0.02318933407465617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,12288,0.026787199576695758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,4096,0.01260373294353485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,16384,0.033928533395131424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,3072,0.010792533556620281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,3584,0.0116266667842865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,5120,0.014613333344459533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,2048,0.007034666836261749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,2560,0.00796693315108617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,4096,65536,0.13318933645884196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,1536,0.006097066899140676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,256,0.0031914666295051576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,1024,0.004301866888999939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,512,0.00344106654326121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,768,0.0038015998899936674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,128,0.0029258665939172106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,64,0.002807466685771942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,32,0.0028234665592511495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,12288,0.0243285338083903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,8192,0.018433066209157307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,10240,0.02127466599146525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,6144,0.014896000425020853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,16384,0.030618667602539062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,7168,0.01599360009034475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,5120,0.01316266655921936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3584,65536,0.1241322676340739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,4096,0.011808000008265178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,1536,0.005668266614278158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,3584,0.010917333761850993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,3072,0.010257066289583842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,2560,0.007681066791216533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,2048,0.006717866659164429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,512,0.0034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,1024,0.004226133227348328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,768,0.0037930667400360107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,256,0.0030901332696278887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,128,0.002932266642649968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,64,0.0027413333455721537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,32,0.00278613343834877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,6144,0.012893866499265036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,3072,65536,0.10599466959635417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,10240,0.018677333990732826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,8192,0.015884799758593242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,16384,0.027210666735966997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,12288,0.02175359924634298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,7168,0.014345600207646688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,5120,0.01160533328851064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,4096,0.011130666732788086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,3584,0.009902933239936828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,3072,0.009306666254997254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,2560,0.007289599875609081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,2048,0.006468266745408376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,1536,0.00526506652434667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,768,0.003773866593837738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,1024,0.004084266722202301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,512,0.0033845332761605583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,128,0.0028607999285062153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,256,0.003099733342727025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,64,0.0027744000156720476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,32,0.002812800059715907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2560,65536,0.09690346717834472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,7168,0.012763733665148417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,16384,0.021516799926757812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,6144,0.011876266201337178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,10240,0.0160778671503067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,12288,0.01751040021578471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,8192,0.013719466328620911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,5120,0.011452800035476685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,3584,0.008938666184743245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,4096,0.009618133306503296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,1024,0.0040522667268912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,3072,0.008469333251317341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,2560,0.007034666836261749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,2048,0.005841066439946493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,1536,0.004713599880536398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,256,0.0030741333961486817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,768,0.0036992001036802924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,64,0.0026805333793163298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,512,0.003389866650104523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,32,0.0027221334477265675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,128,0.002868266652027766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,2048,65536,0.06794133186340331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,5120,0.00928106705347697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,10240,0.013099732995033263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,7168,0.01074026624361674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,8192,0.011592533191045125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,6144,0.00995733340581258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,16384,0.018292266130447387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,12288,0.01437226633230845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,4096,0.008611200253168742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,3584,0.008476799726486206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,1536,0.004728533327579498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,3072,0.007894399762153625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,2048,0.005402666827042898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,1024,0.003999999910593033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,2560,0.006286933521429698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,768,0.0037119999527931214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,512,0.003349333256483078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,256,0.003032533327738444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,32,0.0027221334477265675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,65536,0.0551914652188619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,128,0.002773333340883255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1536,64,0.002713600049416224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,10240,0.010955733060836793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,16384,0.014034133156140646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,12288,0.012016000350316365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,8192,0.009834667046864826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,5120,0.008276266853014629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,7168,0.009125333031018574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,6144,0.008262399832407634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,4096,0.007456000149250031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,3584,0.007316266496976216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,3072,0.006737066805362702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,2560,0.006030933558940887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,65536,0.03719253142674764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,2048,0.005414400001366933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,768,0.0036831999818483984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,1536,0.0046965335806210835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,1024,0.004006399959325791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,512,0.003289599965016047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,256,0.0030602666238943735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,128,0.0028394666810830434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,64,0.0027061333258946735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,1024,32,0.0027317332724730173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,10240,0.009416533509890239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,16384,0.011739733815193176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,12288,0.01018453339735667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,4096,0.006916266679763794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,8192,0.008884267012278239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,7168,0.008276266853014629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,6144,0.007657599945863088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,5120,0.007346133391062419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,2560,0.006031999985376993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,65536,0.03091946641604106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,3584,0.007021866738796234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,2048,0.005374933282534281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,3072,0.006577066580454509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,1536,0.004678399860858917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,1024,0.004013866682847341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,768,0.003668266783157984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,512,0.003365333378314972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,256,0.0030613332986831666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,64,0.0026869334280490874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,128,0.002825599908828735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,10240,0.008410666386286418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,768,32,0.002678400029738744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,16384,0.009920000036557516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,12288,0.00885759989420573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,5120,0.00680213322242101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,8192,0.00774186650911967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,7168,0.00701333334048589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,6144,0.006602666775385539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,65536,0.022539732853571574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,4096,0.006467199822266896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,3584,0.00687360018491745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,3072,0.006528000036875407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,2560,0.006027733286221823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,2048,0.005376000205675761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,1536,0.004676266511281332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,1024,0.0039658665657043455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,768,0.0036447999378045404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,128,0.0028245332340399425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,64,0.002679466704527537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,256,0.0029546665648619336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,512,0.003286399940649668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,512,32,0.002738133321205775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,16384,0.007282133400440216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,12288,0.006822399795055389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,8192,0.006698666512966156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,10240,0.006720000008742015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,5120,0.00684799998998642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,65536,0.014337066809336343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,7168,0.0064735998709996535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,6144,0.006402133405208588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,4096,0.006344533463319142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,3072,0.006494933366775512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,3584,0.006758399804433187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,2048,0.005369600156943003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,1536,0.004613333443800608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,2560,0.006002133091290792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,1024,0.003961600114901861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,768,0.0035989334185918174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,256,0.0030154667794704436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,64,0.0027583998938401537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,512,0.00325546662012736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,128,0.0028213332096735638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,256,32,0.002639999985694885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,65536,0.011481600006421407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,12288,0.006570666531721751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,8192,0.006540800134340922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,16384,0.006537599861621857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,10240,0.0065749332308769224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,7168,0.006297599772612255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,6144,0.006232533355553945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,5120,0.006504533191521962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,3072,0.006321066617965698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,4096,0.006116266548633576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,2048,0.005142400165398916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,2560,0.005763199925422668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,3584,0.0065760001540184024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,1536,0.004503466685612996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,512,0.0033098667860031127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,768,0.0035189333061377203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,1024,0.003868799904982249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,128,0.00275093341867129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,256,0.0029205332199732465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,64,0.0025813333690166474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,128,32,0.0026357332865397137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,16384,0.006467199822266896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,12288,0.006198399762312571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,8192,0.006251733501752217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,65536,0.00905386706193288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,10240,0.006405333181222279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,7168,0.006161066889762879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,6144,0.006000000238418579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,4096,0.006141866743564606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,5120,0.006382933259010315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,3584,0.006451199948787689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,3072,0.006111999849478403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,1536,0.004442666471004486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,768,0.0034901333351929987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,2560,0.005645866692066193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,2048,0.005047466854254405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,1024,0.003773866593837738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,256,0.0028277332584063213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,128,0.002754133443037669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,512,0.003134933362404505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,32,0.002657066782315572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,64,64,0.0025962665677070618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,16384,0.00654720018307368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,65536,0.008869333068529765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,12288,0.0065087998906771345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,10240,0.006227200229962667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,8192,0.006239999830722809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,6144,0.006002133091290792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,7168,0.006080000102519989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,5120,0.006342400113741558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,2560,0.005565866827964783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,1536,0.004376533130804697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,4096,0.005955199897289276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,2048,0.005017599960168203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,3584,0.006398933132489522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,3072,0.006060799956321717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,1024,0.0037610667447249093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,768,0.0034741332133611047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,256,0.0029109333952267963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,512,0.003134933362404505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,64,0.0025589334468046824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,128,0.002703999976317088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,16,32,32,0.0025994665920734406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,4096,0.13385599454243977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,5120,0.165774933497111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,6144,0.1973098595937093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,7168,0.22963306109110512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,8192,0.2657973289489746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,2560,0.09191466967264811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,3584,0.1162336031595866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,3072,0.1010485331217448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,1536,0.058667735258738196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,2048,0.07789333661397299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,1024,0.03966079950332642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,768,0.030789333581924438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,512,0.021857066949208578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,10240,0.3266175905863444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,64,0.004530133306980133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,128,0.0073088000218073535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,256,0.01112000048160553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,32,0.004306133091449738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,12288,0.39089492162068684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,8192,0.06524906555811563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,7168,0.05666346549987793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,10240,0.0798911968866984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,12288,0.09460053443908692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,6144,0.05547840197881063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,4096,0.03444693485895793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,5120,0.042327467600504556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,3072,0.02954346736272176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,16384,0.1404746691385905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,3584,0.030821333328882854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,512,0.006940799951553345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,1024,0.010823466380437215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,768,0.009851732850074768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,1536,0.015130666891733804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,2048,0.018823466698328652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,2560,0.02270080049832662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,128,0.0033429334561030067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,256,0.0038442666331926978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,32,0.0030858665704727173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,64,0.003102933367093404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,65536,16384,0.5241930643717448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,8192,0.05363093217213949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,6144,0.04553920030593872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,7168,0.047518932819366456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,10240,0.06713813145955404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,12288,0.07940159638722738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,4096,0.028437334299087524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,5120,0.035000534852345784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,3584,0.02521066665649414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,2560,0.02020373344421387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,16384,0.10548160076141358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,3072,0.022021333376566567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,768,0.007751466830571492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,1024,0.009288533528645834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,256,0.0036960000793139136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,512,0.006466133395830791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,1536,0.012598400314648947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,2048,0.015848533312479655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,128,0.0032277333239714304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,64,0.002976000060637792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,32,0.0030602666238943735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,10240,0.059165867169698086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,7168,0.03857920169830322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,8192,0.0482101321220398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,12288,0.06889599959055583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,16384,0.09049920241038004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,6144,0.033676799138387045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,4096,0.023526400327682495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,5120,0.028655999898910524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,3584,0.02092693249384562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,3072,0.02044586737950643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,1536,0.010620799660682679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,1024,0.008111999928951263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,2560,0.01590826710065206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,2048,0.013172266880671182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,768,0.007132799923419952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,512,0.005539200206597646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,128,0.0032416000962257386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,256,0.003437866767247518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,64,0.0028927999238173166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,32,0.0030005333324273427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,8192,0.03455466826756795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,10240,0.041495466232299806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,12288,0.052093867460886636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,16384,65536,0.54651304880778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,16384,0.06759146849314371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,7168,0.030321067571640013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,6144,0.026375466585159303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,5120,0.02270080049832662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,3584,0.01690666675567627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,4096,0.020874667167663574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,1536,0.008996267120043437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,3072,0.015042133132616677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,12288,65536,0.40065708160400393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,1024,0.007046400010585785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,768,0.006076799829800924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,2048,0.010869333148002624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,2560,0.013052800297737121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,512,0.004106666644414266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,64,0.0029994666576385496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,256,0.003382399926582972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,128,0.0031658666829268134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,32,0.002919466545184453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,7168,0.03006826639175415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,12288,0.04688426653544108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,8192,0.03341333468755086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,10240,65536,0.37250134150187175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,10240,0.04031573136647542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,16384,0.06601920127868652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,3072,0.01601920028527578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,4096,0.019716266791025797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,5120,0.023281067609786987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,6144,0.026434133450190227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,3584,0.018942934274673463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,2560,0.012656000256538392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,256,0.003291733314593633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,1024,0.006668800115585327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,2048,0.010105599959691364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,768,0.005772800246874491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,1536,0.0085098663965861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,512,0.0037205333511034647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,128,0.0031189332405726117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,64,0.002887466549873352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,32,0.002885333448648453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,8192,65536,0.26570560137430826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,8192,0.02752959926923116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,7168,0.024524799982706704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,10240,0.03398186763127645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,12288,0.03959146738052368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,6144,0.02368853290875753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,16384,0.048740267753601074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,3584,0.013843199610710144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,5120,0.01874666611353556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,4096,0.015672533710797628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,2560,0.010852266351381938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,3072,0.012291199962298075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,2048,0.009087999661763508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,1536,0.007895466685295106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,1024,0.006252799928188324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,512,0.0036458666125933326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,768,0.0051807999610900875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,256,0.0032426667710145317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,64,0.0028949332733949023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,128,0.0030623999734719592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,32,0.002885333448648453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,7168,65536,0.23212587038675941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,7168,0.02437439958254496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,10240,0.03234133323033651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,8192,0.02645333409309387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,12288,0.03766506512959798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,16384,0.048647467295328775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,6144,0.019897600015004478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,5120,0.01578879952430725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,4096,0.0137855996688207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,3072,0.010628267129262289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,3584,0.011868799726168316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,1536,0.007005866865317028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,2048,0.008231466511885326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,2560,0.009417600433031718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,256,0.003159466634194056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,1024,0.005735466877619425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,768,0.0040778666734695435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,512,0.0034495999415715536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,128,0.002961066613594691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,32,0.002828799933195114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,64,0.002811733384927114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,6144,65536,0.19721387227376302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,8192,0.02218773365020752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,10240,0.026605866352717084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,7168,0.020148267348607383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,12288,0.02955519954363505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,6144,0.019393066565195717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,16384,0.03941973447799683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,5120,0.015357866883277893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,3584,0.01218666632970174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,1536,0.006221866607666016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,4096,0.013286399841308593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,3072,0.011148800452550251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,2560,0.00827519992987315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,2048,0.007330133517583211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,768,0.003921066721280416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,1024,0.0044714664419492085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,512,0.0034186666210492453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,5120,65536,0.18536853790283203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,256,0.0031466667850812277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,128,0.0028607999285062153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,32,0.002780800064404805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,64,0.0027029333015282948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,7168,0.017697066068649292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,6144,0.01565439999103546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,8192,0.01948053240776062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,10240,0.02295680046081543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,12288,0.028520532449086505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,16384,0.03319680094718933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,5120,0.014111999670664468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,4096,0.012406399846076966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,2560,0.007973333199818928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,3584,0.01157866617043813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,1536,0.006007466713587443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,4096,65536,0.1455285390218099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,3072,0.010678399602572124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,2048,0.006955733398596446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,1024,0.004159999887148539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,256,0.003134933362404505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,768,0.003705599904060364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,128,0.002996266633272171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,512,0.003366400053103765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,64,0.0027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,32,0.0027434666951497394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,10240,0.02102186679840088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,6144,0.014663466811180114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,8192,0.01798186699549357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,7168,0.015837867061297098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,12288,0.023877332607905068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,16384,0.0299615999062856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,5120,0.01306880017121633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,4096,0.011653332908948263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,1536,0.005730133255322774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,2560,0.007546666761239369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,3584,0.010781866312026978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3584,65536,0.12386879920959473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,3072,0.009779199957847595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,2048,0.006717866659164429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,768,0.003706666578849157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,1024,0.004060799876848856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,512,0.0033258666594823206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,128,0.002867199977238973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,32,0.002796799937884013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,256,0.003070933371782303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,64,0.0027200000981489818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,8192,0.0156768004099528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,6144,0.012746666868527731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,10240,0.01841493248939514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,7168,0.0144896000623703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,12288,0.021230934063593547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,5120,0.01162559986114502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,16384,0.027077333132425947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,3072,65536,0.1044266700744629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,4096,0.010452266534169514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,2048,0.006526933113733928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,3584,0.00974720021088918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,2560,0.007159466544787089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,3072,0.00920746624469757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,1536,0.0048213332891464235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,512,0.003339733431736628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,1024,0.004067199925581614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,768,0.0036746665835380556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,256,0.003019733230272929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,128,0.0028181334336598715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,32,0.0028618666032950084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,64,0.0026464000344276427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,7168,0.01255466639995575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,6144,0.011554132898648579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2560,65536,0.09683840274810791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,8192,0.01368106702963511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,10240,0.01572160025437673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,12288,0.01761173407236735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,16384,0.022156800826390585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,3584,0.009291733304659527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,5120,0.010593066612879436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,4096,0.009590400258700053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,3072,0.008402132987976074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,2560,0.007005866865317028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,1024,0.003970133264859518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,2048,0.005763199925422668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,1536,0.0046186665693918865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,768,0.00363520011305809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,64,0.0027327999472618104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,256,0.0029813334345817565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,512,0.003278933217128118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,128,0.0028149334092934927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,65536,0.07042666276295981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,2048,32,0.0026943999032179515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,6144,0.00988159974416097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,10240,0.012930132945378623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,8192,0.011479467153549194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,7168,0.010629333058993022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,12288,0.014443733294804893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,5120,0.009097599983215332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,16384,0.017948800325393678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,4096,0.008563199639320373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,3584,0.008291199803352356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,3072,0.007659733295440674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,2560,0.005975466469923655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,768,0.00360959991812706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,2048,0.0054293334484100345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,1536,0.004645333190759023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,1024,0.003953066716591517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,512,0.003268266717592875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,64,0.0026933332284291584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,65536,0.054656000932057705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,32,0.0028042666614055633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,256,0.0029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1536,128,0.0027989332874615988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,12288,0.011865599950154623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,10240,0.010919466614723206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,7168,0.009301333626111349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,16384,0.013963733116785684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,8192,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,6144,0.008132266501585644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,5120,0.008072533210118612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,4096,0.00699839989344279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,3584,0.007092266778151194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,3072,0.0067114666104316715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,2560,0.005986133217811584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,1536,0.004636799792448679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,2048,0.005291733145713806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,1024,0.003960533440113068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,65536,0.037963732083638506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,256,0.003035733352104823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,768,0.0036277333895365395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,512,0.0032618666688601174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,128,0.002829866607983907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,64,0.002674133330583572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,12288,0.009962667028109233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,1024,32,0.0026591998835404714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,8192,0.008665600419044494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,10240,0.009372799595197042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,16384,0.011403733491897583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,5120,0.007286400099595388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,7168,0.008102400104204814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,3584,0.0067775999506314594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,4096,0.006557866434256236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,6144,0.007523199915885926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,3072,0.006494933366775512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,2560,0.007798400024573009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,65536,0.0304202675819397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,2048,0.0067210664351781205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,1024,0.004576000074545542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,1536,0.005794133245944977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,768,0.004266666869322458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,256,0.0032170665760835014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,512,0.003703466554482778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,32,0.0027776000400384264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,128,0.002979200085004171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,768,64,0.0026496000587940215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,12288,0.00860479970773061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,10240,0.008340266346931458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,7168,0.006963199873765309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,16384,0.009845333298047383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,8192,0.007334400216738384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,6144,0.00647680014371872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,65536,0.02265066703160604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,5120,0.006753066678841908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,3072,0.006559999783833821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,3584,0.00681386689345042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,4096,0.006321066617965698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,2048,0.005357866485913595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,2560,0.006049066781997681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,1024,0.003993600110212962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,512,0.0033461332321166994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,1536,0.004695466657479604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,768,0.0036469332873821257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,256,0.003053866575161616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,128,0.0028512001037597655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,32,0.0027488000690937043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,512,64,0.0027274665733178455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,16384,0.007351466516653697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,12288,0.006635733445485433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,65536,0.014250666896502177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,8192,0.006423466900984447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,10240,0.006583466629187266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,4096,0.00631039987007777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,7168,0.006362666686375936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,6144,0.006277333199977875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,5120,0.006640000144640605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,1536,0.0046186665693918865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,3072,0.0063701331615448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,2560,0.005931733548641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,3584,0.0066442668437957765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,2048,0.005264000097910563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,768,0.0035584000249703727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,1024,0.003923200070858002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,256,0.003011200080315272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,512,0.0032298666735490165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,128,0.002773333340883255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,64,0.00264533335963885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,256,32,0.0026677332818508146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,16384,0.006371200084686279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,10240,0.006539733211199443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,65536,0.011262933413187664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,12288,0.006390400230884552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,7168,0.0061951999862988796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,8192,0.006286933521429698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,6144,0.006001066664854685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,4096,0.006021333237489065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,3584,0.006558933357397716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,5120,0.006455466647942861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,2048,0.005272533496220907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,3072,0.006098133325576782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,2560,0.005959466596444448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,1024,0.003928533444801966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,1536,0.004604800045490265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,512,0.0033482665816942847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,768,0.003565866748491923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,128,0.0027189334233601887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,256,0.0029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,32,0.002628266563018163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,128,64,0.0025994665920734406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,65536,0.008360532919565837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,8192,0.006151466568311056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,16384,0.006247466802597046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,12288,0.006450133522351582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,10240,0.006274133423964183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,7168,0.006021333237489065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,6144,0.005884799857934316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,3584,0.006362666686375936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,5120,0.006298666695753734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,4096,0.005890133480230967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,2560,0.005611733098824819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,2048,0.004951466619968414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,3072,0.006065066655476888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,768,0.003419733295838038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,1536,0.004330666859944662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,128,0.0027104000250498454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,512,0.0030954666435718536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,1024,0.0036821333070596062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,256,0.0027690666417280836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,64,0.0025813333690166474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,64,32,0.0025205334027608235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,65536,0.008108800152937572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,12288,0.006142933170000712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,16384,0.006309333443641663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,10240,0.00618453323841095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,7168,0.005904000004132589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,6144,0.005876266459623972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,8192,0.006100266675154368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,5120,0.0062730665008227035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,3584,0.006326400240262349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,4096,0.0058783998092015585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,2560,0.005891199906667074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,2048,0.005203199883302053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,3072,0.006029866635799408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,1536,0.004577066500981649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,768,0.0033930666744709016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,1024,0.003685333331425985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,512,0.003202133377393087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,256,0.002833066632350286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,128,0.0026357332865397137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,64,0.0024821333587169646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,8,32,32,0.0025546667476495106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,4096,0.1334858735402425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,5120,0.16539306640625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,6144,0.19691413243611652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,7168,0.22955093383789063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,8192,0.26517546971639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,3584,0.11605546474456788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,2048,0.0773973306020101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,3072,0.10087573528289795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,1536,0.05870293378829956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,2560,0.08763306935628255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,512,0.021613866090774536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,768,0.02781333327293396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,128,0.007977599898974102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,256,0.011308800180753071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,10240,0.32630081176757814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,1024,0.04002559979756673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,64,0.004587733248869578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,32,0.0042698666453361515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,12288,0.3914911905924479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,7168,0.056553598244984946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,8192,0.06410239934921265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,10240,0.07966720263163249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,12288,0.10701653162638347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,6144,0.055638400713602695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,5120,0.041611735026041666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,4096,0.03378239870071411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,16384,0.12816853523254396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,3584,0.030082132418950396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,2048,0.018649599949518838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,3072,0.02757973273595174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,1024,0.012000000476837159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,2560,0.022541866699854533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,1536,0.01492586632569631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,512,0.006902400155862172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,768,0.008906666437784832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,128,0.003435733417669932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,256,0.0040778666734695435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,64,0.0030421334008375804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,32,0.0030400000512599947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,65536,16384,0.5230005264282227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,6144,0.04594666560490926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,7168,0.04748266537984212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,8192,0.05424319903055826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,10240,0.06730986436208089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,12288,0.07989760239919028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,3584,0.02536533276240031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,2560,0.018926932414372762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,4096,0.027966932455698652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,5120,0.035121067365010576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,16384,0.09718080361684164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,3072,0.021993599335352578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,1536,0.012516267100969949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,1024,0.009238400061925252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,768,0.007694933315118153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,256,0.003737599899371465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,512,0.006454400221506755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,2048,0.015796266992886863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,128,0.0032640000184377036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,64,0.002998399982849757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,32,0.002979200085004171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,7168,0.038525867462158206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,10240,0.05848426818847656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,8192,0.04761279821395874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,12288,0.0685802698135376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,5120,0.030242133140563964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,16384,0.08872746626536052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,4096,0.02342080076535543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,6144,0.0333898663520813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,3584,0.020875734090805054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,3072,0.019261866807937622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,1536,0.01072746713956197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,1024,0.00805866668621699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,2048,0.013082666198412576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,2560,0.017004799842834473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,768,0.007124266525109608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,256,0.003505066782236099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,512,0.0054517333706219995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,32,0.0029834667841593427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,128,0.0031413334111372627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,64,0.002961066613594691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,8192,0.03460479974746704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,10240,0.0421120007832845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,12288,0.05292799870173136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,16384,0.06726826826731364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,16384,65536,0.5402975718180338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,12288,65536,0.3971776008605957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,5120,0.023176532983779908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,6144,0.02621440092722575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,7168,0.030393600463867188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,3584,0.018524799744288126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,2560,0.01299626628557841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,4096,0.019115734100341796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,1536,0.009013332923253377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,3072,0.01646080017089844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,2048,0.010925867160161336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,768,0.006106666723887126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,512,0.00413973331451416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,256,0.00345920001467069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,1024,0.00699839989344279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,128,0.003155199935038885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,64,0.002868266652027766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,32,0.0030016000072161358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,7168,0.03214826583862305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,8192,0.03327893416086833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,10240,0.04010026852289836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,10240,65536,0.37167574564615885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,12288,0.04653333425521851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,16384,0.05989120006561279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,4096,0.019684267044067384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,6144,0.02820906639099121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,5120,0.022835199038187662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,2048,0.010194133718808491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,3584,0.018997333447138467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,3072,0.015922133127848306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,1536,0.008654933174451191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,2560,0.01211840013662974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,1024,0.006631466746330261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,768,0.005783466498057047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,512,0.00391146664818128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,128,0.003092266619205475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,256,0.003324799984693527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,32,0.00288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,64,0.0028586665789286296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,8192,65536,0.26370132764180504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,7168,0.02358506719271342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,12288,0.04178986549377441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,8192,0.02723520000775655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,10240,0.035258666674296064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,6144,0.024732800324757893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,16384,0.05028479894002279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,4096,0.01607146660486857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,5120,0.017829332749048868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,3584,0.013800533612569174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,3072,0.012950399518013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,1536,0.00804373323917389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,2560,0.011355732878049214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,2048,0.00942080020904541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,256,0.0033130665620168054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,1024,0.006212266782919565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,768,0.005005866785844167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,512,0.003773866593837738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,128,0.0030037333567937215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,64,0.0027615999182065325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,32,0.002899199972550074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,7168,65536,0.2299957275390625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,7168,0.021540266275405884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,8192,0.026683733860651655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,10240,0.031853866577148435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,12288,0.037265066305796304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,16384,0.04844906727472941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,5120,0.01635199983914693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,6144,0.01994346578915914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,3072,0.010761599739392598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,4096,0.01337493360042572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,2048,0.008406399687131246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,3584,0.011962667107582092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,2560,0.009399466713269551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,512,0.003585066646337509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,1536,0.007052800059318543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,1024,0.005682133138179779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,768,0.004185600082079569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,256,0.0032255999743938447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,128,0.0029866665601730345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,6144,65536,0.19680213928222656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,64,0.0027744000156720476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,32,0.0027647999425729113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,7168,0.020182400941848755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,8192,0.023537067572275798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,6144,0.017208532492319743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,10240,0.026282666126887004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,12288,0.030503465731938677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,16384,0.03877546787261963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,5120,0.01535360018412272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,4096,0.013370666901270548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,3072,0.011245866616566975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,3584,0.012241066495577494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,2560,0.008303999900817871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,2048,0.007319466769695282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,1536,0.006236800054709116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,1024,0.004569600025812784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,768,0.003739733248949051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,256,0.003083733220895131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,512,0.003442133218050003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,128,0.0028885332246621448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,32,0.002776533365249634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,64,0.0028160000840822858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,5120,65536,0.18154452641805013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,7168,0.01827626625696818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,8192,0.019320533672968546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,12288,0.026280534267425538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,6144,0.01690666675567627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,10240,0.02265920042991638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,16384,0.03309546709060669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,4096,0.01244160036245982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,3584,0.011452800035476685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,5120,0.014549332857131957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,2048,0.006914133330186208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,3072,0.010729599992434185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,2560,0.007912533481915791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,1536,0.006039466460545858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,4096,65536,0.1327466646830241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,64,0.0027637332677841187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,1024,0.004119466741879781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,768,0.003765333443880081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,512,0.0034048000971476236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,256,0.003190399954716364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,128,0.002872533351182938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,32,0.0027274665733178455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,12288,0.024539732933044435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,6144,0.015014400084813436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,10240,0.021610667308171592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,7168,0.016756266355514526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,8192,0.017542399962743125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,16384,0.03098133405049642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,5120,0.01328426698843638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,4096,0.011707733074824016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3584,65536,0.1221226692199707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,3584,0.01092906693617503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,3072,0.010027733445167542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,2560,0.007561600208282471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,1536,0.005736533304055532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,2048,0.006676266590754191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,64,0.0027456000447273255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,512,0.003401600072781245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,256,0.0031221332649389905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,1024,0.004065066576004028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,768,0.003737599899371465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,128,0.002880000074704488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,32,0.002784000088771184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,6144,0.012814933061599731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,7168,0.014479999740918478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,8192,0.015494400262832641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,10240,0.018338133891423546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,12288,0.021066667636235555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,16384,0.02682773272196452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,3072,65536,0.10411626497904461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,5120,0.01164906620979309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,1536,0.004966400067011515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,4096,0.01042560040950775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,2560,0.0071839998165766404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,3584,0.009668266773223877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,3072,0.009106133381525676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,2048,0.006362666686375936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,1024,0.004031999905904134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,768,0.003669333209594091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,256,0.0030826665461063385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,512,0.003345066557327906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,128,0.002869333326816559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,64,0.0027221334477265675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,32,0.0027029333015282948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,16384,0.021794132391611733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,7168,0.012844799955685934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2560,65536,0.09647253354390463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,6144,0.011834667126337687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,12288,0.018130133549372353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,8192,0.013548800349235534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,10240,0.015339733163515726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,5120,0.010670933127403259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,3584,0.009257599711418152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,4096,0.009641599655151368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,3072,0.008239999910195668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,2560,0.006981333096822103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,1024,0.003945599993069967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,2048,0.0056639999151229855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,1536,0.004650666813055674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,768,0.0036373332142829893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,64,0.0026687999566396077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,512,0.0033088001112143196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,256,0.002985599885384242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,128,0.002775466690460841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,32,0.002681600054105123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,2048,65536,0.06765013535817464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,16384,0.01806400020917257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,10240,0.013273599743843078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,12288,0.014288000265757241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,6144,0.009875200192133586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,8192,0.011558399597803751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,7168,0.01072746713956197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,5120,0.009230933586756388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,4096,0.008436266581217449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,3072,0.007387733459472657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,3584,0.008263466755549113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,2560,0.00844693382581075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,2048,0.006986666719118755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,768,0.003667200108369192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,1536,0.0064650664726893115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,1024,0.0039381332695484165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,512,0.0033088001112143196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,32,0.00277866671482722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,65536,0.054169599215189615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,256,0.0029909332593282064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,128,0.0028138667345046996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1536,64,0.0026677332818508146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,10240,0.011001599828402202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,12288,0.011767466862996418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,16384,0.014260266224543253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,8192,0.009669333696365356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,7168,0.00899733304977417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,6144,0.008110933502515157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,5120,0.008020266890525818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,4096,0.0072522665063540145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,3584,0.007129600147406261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,3072,0.0065536002318064375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,2560,0.005973333120346069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,768,0.0036085332433382668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,2048,0.005308799942334493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,65536,0.039789867401123044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,1536,0.004611200094223023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,1024,0.003967999915281932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,512,0.003307733436425527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,256,0.0029546665648619336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,64,0.0027882667879263563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,128,0.0028021333118279776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,1024,32,0.0027189334233601887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,16384,0.011428266763687134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,10240,0.00931946635246277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,12288,0.009970133503278095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,8192,0.008753066261609394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,7168,0.00810346653064092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,4096,0.00633493314186732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,6144,0.007433600227038066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,3584,0.006763733426729838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,5120,0.007493333518505096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,3072,0.006442666550477346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,2048,0.006625066697597504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,65536,0.029922133684158324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,2560,0.00774186650911967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,1536,0.00625493327776591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,128,0.0029898665845394133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,768,0.004334933559099833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,512,0.0036992001036802924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,1024,0.00476800004641215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,256,0.003201066702604294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,32,0.002834133307139079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,768,64,0.0027583998938401537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,16384,0.009892266988754273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,12288,0.008732799688975017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,6144,0.006295466423034668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,7168,0.006886399785677592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,10240,0.008203733464082081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,8192,0.007246933380762736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,4096,0.0062837332487106325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,5120,0.006739200154940288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,3584,0.006740266581376393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,65536,0.021989333629608154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,1024,0.004016000032424927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,3072,0.006402133405208588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,2560,0.00600853314002355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,2048,0.005331199864546458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,1536,0.004862933357556661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,768,0.003643733263015747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,128,0.0030165334542592366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,512,0.003294933338960012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,256,0.003091199944416682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,64,0.0027306665976842242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,512,32,0.0026880001028378804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,10240,0.00668693333864212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,16384,0.007457066575686138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,12288,0.006696533163388569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,8192,0.006445866823196411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,65536,0.014141866564750671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,4096,0.006986666719118755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,6144,0.006259199976921081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,5120,0.008108800152937572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,7168,0.006320000191529592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,1536,0.004635733366012573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,3584,0.006682666639486949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,3072,0.0063274666666984555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,2560,0.005909333129723867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,768,0.003587199995915095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,2048,0.00526506652434667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,512,0.0031968000034491217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,256,0.003009066730737686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,1024,0.0041002665956815084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,128,0.002775466690460841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,32,0.002628266563018163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,10240,0.007948799928029378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,256,64,0.0026079999903837843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,65536,0.011026133100191753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,16384,0.006347733239332835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,12288,0.006265600025653839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,5120,0.00661653329928716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,8192,0.007204266885916392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,6144,0.006205866734186808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,7168,0.006548266609509785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,3584,0.006668800115585327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,4096,0.006232533355553945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,3072,0.006359466910362243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,2048,0.0053045332431793215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,2560,0.00594346672296524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,1024,0.003910399973392487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,512,0.003218133250872294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,768,0.0035445332527160645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,1536,0.004587733248869578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,256,0.002846933404604594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,64,0.0026880001028378804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,128,0.0027818667391935987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,128,32,0.002595199892918269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,7168,0.006250666578610738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,65536,0.00849173367023468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,16384,0.007090133428573608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,12288,0.006384000182151794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,8192,0.006366933385531108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,10240,0.006487466891606649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,5120,0.0065984000762303666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,4096,0.006090666850407918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,6144,0.0061152001221974695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,3584,0.0067114666104316715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,3072,0.006351999938488007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,2560,0.005888000130653381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,2048,0.004923733572165171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,768,0.003421866645415624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,1024,0.003667200108369192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,256,0.0028266665836175283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,1536,0.004342400034268697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,512,0.0030794667700926462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,64,0.002571733295917511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,128,0.002648533384005229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,65536,0.007779199878374736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,64,32,0.002567466596762339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,12288,0.0064074665307998655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,16384,0.006281599899133046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,8192,0.006454400221506755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,7168,0.006295466423034668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,10240,0.0063509335120519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,6144,0.006098133325576782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,3584,0.0065760001540184024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,5120,0.006437333424886067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,4096,0.006064000229040781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,3072,0.006164266665776571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,2048,0.005205333232879639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,768,0.0035125332574049628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,2560,0.005886933207511902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,512,0.003289599965016047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,1024,0.0038634667793909705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,1536,0.004556799928347269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,256,0.0028575999041398365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,32,0.00258240004380544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,64,0.0025162667036056517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,4,32,128,0.002566399921973546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,4096,0.13309653600056964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,5120,0.1652234713236491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,6144,0.19684799512227374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,7168,0.22882026036580405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,8192,0.2622549374898275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,3584,0.11554346879323323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,2560,0.09153599739074707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,3072,0.10026026566823323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,768,0.030869332949320476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,1024,0.04025919834772746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,256,0.01106346646944682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,10240,0.32659734090169273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,512,0.021657600005467733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,2048,0.07706133524576822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,1536,0.058538667360941564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,64,0.004308266441027323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,128,0.007231999933719635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,32,0.004273066421349844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,12288,0.39725119272867837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,7168,0.056484266122182214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,8192,0.06383253335952759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,10240,0.0798911968866984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,6144,0.05620799859364828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,12288,0.09401919841766357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,3072,0.026659200588862102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,3584,0.02986026604970296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,4096,0.03351039886474609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,5120,0.041621331373850504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,16384,0.12452267011006672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,2560,0.025250132878621417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,768,0.009805867075920105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,512,0.007514666517575581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,2048,0.01872746745745341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,1536,0.014811733365058899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,1024,0.010825600226720173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,256,0.00408746674656868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,32,0.003138133386770884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,128,0.0033674667278925574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,64,0.0030773334205150605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,65536,16384,0.5214751879374186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,7168,0.04844053188959758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,8192,0.05456639925638834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,6144,0.04602133433024089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,10240,0.06891199747721354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,12288,0.08065600395202636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,3072,0.022048000494639078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,3584,0.025435733795166015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,16384,0.10401279926300049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,4096,0.028130133946736652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,5120,0.03519893487294515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,2560,0.020351999998092653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,2048,0.01588373382886251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,1024,0.010095999638239542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,768,0.007785599927107494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,1536,0.012717866897583007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,128,0.0032469332218170166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,512,0.006131199995676676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,256,0.0037589333951473236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,64,0.003018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,32,0.0029386666913827257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,7168,0.03869013388951619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,8192,0.04757973353068034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,10240,0.0580725351969401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,12288,0.06846079826354981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,16384,0.09002453486124674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,4096,0.02355733315149943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,5120,0.02788693308830261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,6144,0.03337173461914063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,3584,0.020994132757186888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,3072,0.020594133933385213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,768,0.007123200098673503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,1536,0.010616532961527507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,1024,0.00808426688114802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,2560,0.01589120030403137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,2048,0.013040000200271606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,512,0.005674666663010915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,256,0.003570133447647095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,128,0.0031328000128269195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,64,0.002902399996916453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,32,0.002867199977238973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,8192,0.03388160069783529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,10240,0.041774932543436685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,16384,65536,0.5425877253214518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,12288,0.051668266455332436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,12288,65536,0.3983231862386068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,16384,0.06670933564503988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,7168,0.02993920048077901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,3584,0.018563199043273925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,5120,0.025220266977945965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,6144,0.026387200752894087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,4096,0.018922666708628334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,3072,0.014909866452217101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,2048,0.010941867033640544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,1536,0.009105066458384197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,1024,0.0071829333901405334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,768,0.0060703997810681665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,512,0.004343466460704803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,2560,0.012942933042844138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,256,0.0035189333061377203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,128,0.0031498665610949195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,64,0.00288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,32,0.0028319999575614927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,12288,0.045878398418426516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,7168,0.029721599817276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,8192,0.03322666684786479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,10240,65536,0.3747178713480631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,10240,0.0400000015894572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,16384,0.05946026643117269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,6144,0.025974400838216144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,3584,0.01889066696166992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,3072,0.01577173372109731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,5120,0.022922666867574056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,4096,0.019636267423629762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,2048,0.010177066922187806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,2560,0.01179200013478597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,256,0.0033973333736260734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,1024,0.0067221333583196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,768,0.005793066819508871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,1536,0.008501332998275758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,512,0.004162133236726125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,128,0.0031178665657838186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,64,0.002887466549873352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,32,0.002855466554562251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,8192,65536,0.2847061475118001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,8192,0.027027199665705364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,7168,0.02370133399963379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,6144,0.025396267573038738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,12288,0.04285973310470581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,10240,0.03333546717961629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,16384,0.05702826579411825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,3584,0.014842666188875833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,5120,0.018314667542775474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,4096,0.01529813309510549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,2560,0.010731732845306397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,3072,0.013110400239626566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,1536,0.008107733229796093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,2048,0.009878399968147277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,1024,0.006316799918810527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,128,0.0030378667016824085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,512,0.003899733225504557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,768,0.005107200145721436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,256,0.003349333256483078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,64,0.002850133428970973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,32,0.0028362666567166646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,7168,65536,0.22992854118347167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,7168,0.020475733280181884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,10240,0.03168320059776306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,8192,0.026601600646972656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,12288,0.03653119802474976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,16384,0.04796266555786133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,5120,0.015664000312487283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,6144,0.0202890674273173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,4096,0.013544533650080362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,3072,0.010831999778747558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,2048,0.008224000036716462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,3584,0.012140799562136333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,2560,0.00937066674232483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,1536,0.007079466680685679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,512,0.0036629334092140196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,1024,0.0056991999348004665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,768,0.004244266450405121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,256,0.00329066663980484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,32,0.0027744000156720476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,64,0.0028031999866167706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,128,0.003013333429892858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,6144,65536,0.19520533879597982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,10240,0.026064000527064007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,8192,0.021964800357818604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,7168,0.019833600521087645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,12288,0.030628265937169392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,6144,0.018524799744288126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,16384,0.036637866497039796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,5120,0.015197867155075073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,3584,0.012123733758926392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,3072,0.011083733042081196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,4096,0.013178666432698568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,1536,0.006257066627343495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,2560,0.008291199803352356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,2048,0.0072522665063540145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,1024,0.004465066889921824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,5120,65536,0.18390293121337892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,256,0.0031008000175158186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,64,0.00275093341867129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,768,0.0038101332883040107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,128,0.0029365333418051405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,512,0.003505066782236099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,32,0.002752000093460083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,16384,0.03282559911410014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,7168,0.017359999815622966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,6144,0.015533866484959922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,8192,0.01927679975827535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,12288,0.026012800137201947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,10240,0.02258560061454773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,5120,0.014132266243298849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,3584,0.011502933502197266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,4096,0.01251520017782847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,4096,65536,0.13185919920603434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,3072,0.010670933127403259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,2560,0.00786240001519521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,2048,0.006888533135255177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,1536,0.006040533383687338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,1024,0.0042922665675481165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,768,0.003790933390458425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,256,0.0031082667410373688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,32,0.002752000093460083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,512,0.003479466587305069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,128,0.002885333448648453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,64,0.002737066646416982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,10240,0.020771199464797975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,6144,0.015126400192578635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,7168,0.01563093364238739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,8192,0.018398932615915933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,12288,0.02492799957593282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,5120,0.013060266772905985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,16384,0.031514666477839154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3584,65536,0.1257695992787679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,3584,0.010469333330790202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,4096,0.011806933085123698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,2048,0.006690133114655812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,3072,0.010214400291442872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,2560,0.007516799867153168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,1536,0.005570133527119955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,1024,0.004105599969625473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,256,0.003099733342727025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,768,0.003769599894682566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,512,0.0034314667185147605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,128,0.002974933385848999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,64,0.002712533374627431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,32,0.0027285332481066385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,16384,0.026364799340565997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,6144,0.012852266430854797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,8192,0.015734400351842245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,12288,0.02072640061378479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,10240,0.018284799655278523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,7168,0.014320000012715658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,5120,0.011530666550000509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,3072,65536,0.10320213635762532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,4096,0.01046720047791799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,1536,0.004924799998601278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,3584,0.009746133287747701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,3072,0.009084799885749817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,2560,0.007227733234564463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,2048,0.006477866570154827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,1024,0.004042666653792063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,512,0.0033791999022165934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,768,0.0037109332780043284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,128,0.002867199977238973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,256,0.0030858665704727173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,64,0.0027200000981489818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,32,0.0027242665489514667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2560,65536,0.09608320395151773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,12288,0.018423465887705485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,16384,0.02190720041592916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,7168,0.013051733374595642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,6144,0.011849600076675414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,8192,0.01404800017674764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,10240,0.015284267067909241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,5120,0.011347200473149617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,3584,0.008769067128499348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,4096,0.009660800298055012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,3072,0.008328533172607422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,2560,0.007034666836261749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,2048,0.005662933488686879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,1536,0.004642133414745331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,768,0.0036138666172822317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,1024,0.003964799890915553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,512,0.0032927999893824257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,64,0.0026986666023731233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,32,0.0026922665536403658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,256,0.0029696000119050344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,128,0.00276799996693929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,2048,65536,0.06663039922714234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,16384,0.018348799149195353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,10240,0.013321600357691445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,6144,0.010016000270843506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,8192,0.011220266421635944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,7168,0.010757333040237427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,5120,0.009106133381525676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,4096,0.008566400408744812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,12288,0.014962133765220643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,3584,0.008263466755549113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,1024,0.0039989332358042395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,3072,0.00764160007238388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,2560,0.008390399813652038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,2048,0.007663999994595845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,1536,0.0063733334342638654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,768,0.0035914666950702667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,256,0.0030794667700926462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,65536,0.05403840144475301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,512,0.00327360009153684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,128,0.002794666588306427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,64,0.002674133330583572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1536,32,0.0027093333502610523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,12288,0.011970133582750956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,10240,0.010828800002733866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,16384,0.014355199535687766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,8192,0.00993173321088155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,4096,0.006925866504510244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,7168,0.008981333176294962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,5120,0.008026666442553202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,6144,0.008166400094827015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,2048,0.005287466446558634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,3584,0.006972800195217133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,3072,0.006445866823196411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,65536,0.038627199331919354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,2560,0.005980800092220307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,1536,0.004622933268547058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,1024,0.003933866570393244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,768,0.003605333218971888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,512,0.0033151999115943907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,256,0.002993066608905792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,128,0.0028277332584063213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,64,0.0026911998788515727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,1024,32,0.002746666719516118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,10240,0.009620267152786254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,16384,0.011910399794578553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,12288,0.009709866841634114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,8192,0.008641067147254943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,7168,0.007893333335717519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,4096,0.0066538666685422255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,6144,0.007246933380762736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,5120,0.00717439999183019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,1536,0.006155733267466227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,2560,0.00840106705824534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,3584,0.006791466474533081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,3072,0.006431999802589417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,2048,0.007419733206431071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,65536,0.02967573404312134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,1024,0.004920533299446106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,512,0.0037418665985266366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,768,0.004294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,256,0.003205333401759466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,128,0.003018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,64,0.002773333340883255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,768,32,0.002807466685771942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,12288,0.008550399541854858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,16384,0.00979306697845459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,8192,0.00703359991312027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,10240,0.008183466891447704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,4096,0.006277333199977875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,7168,0.006850133339564006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,6144,0.006296533346176148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,5120,0.006675200164318084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,65536,0.023161600033442177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,2048,0.005539200206597646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,3584,0.006696533163388569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,3072,0.006371200084686279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,2560,0.006233599781990051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,1536,0.0047872001926104225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,256,0.0030847998956839246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,1024,0.004014933357636133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,768,0.003643733263015747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,512,0.0033098667860031127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,64,0.002721066772937775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,128,0.002872533351182938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,16384,0.007490133245786031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,512,32,0.002726399898529053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,12288,0.006589866677920024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,10240,0.006537599861621857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,8192,0.0063967997829119366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,5120,0.007781333227952321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,7168,0.006322133541107178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,65536,0.014231466253598533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,6144,0.006196266909440359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,4096,0.00713919997215271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,3584,0.006783999999364217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,2048,0.005306666592756907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,3072,0.006479999919732411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,2560,0.005930666625499725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,1536,0.004586666822433472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,768,0.0035402665535608927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,1024,0.003902933249870936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,128,0.0028277332584063213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,512,0.0032320000231266023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,64,0.002647466709216436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,256,0.0029130667448043824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,256,32,0.0026069333155949908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,65536,0.011078400413195293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,16384,0.006140799820423126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,10240,0.00821973333756129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,12288,0.006169599791367849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,8192,0.00720000018676122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,7168,0.006593066453933716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,6144,0.006222933530807495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,3072,0.00631039987007777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,5120,0.00664213349421819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,4096,0.006239999830722809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,3584,0.006666666766007741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,2560,0.00594346672296524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,768,0.003607466568549474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,1536,0.004584533472855886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,2048,0.00525439977645874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,1024,0.0038954667747020722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,256,0.002959999938805898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,512,0.0032000000278155005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,128,0.0027829334139823914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,32,0.0026719999810059865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,128,64,0.002585600068171819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,65536,0.00823040008544922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,10240,0.00664106657107671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,16384,0.00720000018676122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,8192,0.0065184002121289565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,12288,0.0064064001043637585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,6144,0.006135466694831848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,7168,0.0063701331615448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,5120,0.006618666648864746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,1536,0.004347733159859975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,3584,0.006705066561698914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,2560,0.005894400179386139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,4096,0.006166400015354156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,3072,0.006346666812896728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,2048,0.0049216002225875854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,1024,0.003671466559171677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,512,0.0031946666538715364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,768,0.0034122665723164878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,256,0.0028138667345046996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,128,0.0026261332134405774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,64,0.0025386666258176167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,64,32,0.0025888000925381976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,65536,0.008106666803359985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,12288,0.006175999840100607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,16384,0.006479999919732411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,10240,0.006409599880377452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,8192,0.006365866462389629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,7168,0.006185600161552429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,3584,0.006460799773534138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,6144,0.0061034664511680605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,5120,0.0065738668044408154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,4096,0.0060362666845321655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,2048,0.005226666728655497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,1536,0.004539733131726583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,3072,0.006121600170930227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,2560,0.005875200033187866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,1024,0.0038965334494908653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,768,0.0035082665582497918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,512,0.0031786667803923286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,128,0.00266239990790685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,256,0.002867199977238973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,64,0.0025087999800841015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,2,32,32,0.0024959998826185863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,4096,0.1330997308095296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,5120,0.16473813056945802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,6144,0.19617387453715007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,7168,0.22917332649230956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,8192,0.2617525259653727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,3584,0.11531413396199544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,2048,0.07745386759440104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,3072,0.10015786488850911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,2560,0.08710506757100424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,768,0.027449599901835126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,1024,0.03999040126800537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,512,0.021554134289423623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,256,0.012282666563987733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,128,0.007330133517583211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,10240,0.32599573135375975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,1536,0.05859306653340658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,64,0.004323199888070424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,32,0.004229333500067393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,12288,0.39569279352823894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,7168,0.05650560061136881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,6144,0.05615253448486328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,8192,0.06383893489837647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,10240,0.08002986907958984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,12288,0.10746026833852132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,4096,0.034381866455078125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,16384,0.1252511978149414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,3584,0.02987946669260661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,5120,0.041368532180786136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,3072,0.026382933060328167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,2560,0.02418346603711446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,2048,0.01895573337872823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,1536,0.016446933150291443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,768,0.008938666184743245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,1024,0.010778666536013285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,512,0.006985599795977275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,256,0.003994666785001755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,128,0.003432533393303553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,64,0.0030720000465710956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,32,0.0030389333764712016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,65536,16384,0.5215210596720378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,8192,0.05465066830317179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,7168,0.04843306541442871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,6144,0.04603519837061564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,10240,0.06764159997304281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,12288,0.07835840384165446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,3072,0.022064000368118286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,5120,0.03548693259557088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,3584,0.025466666618982954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,4096,0.028562132517496747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,16384,0.10728000005086262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,2560,0.020403200387954713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,1024,0.009360000491142273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,768,0.007714133461316426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,1536,0.012745599945386252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,256,0.0037237333754698435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,512,0.0065749332308769224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,2048,0.015774933497111003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,32,0.0030122667551040648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,128,0.0032448001205921174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,64,0.0029098667204380036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,7168,0.03851199944814046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,10240,0.05778239965438843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,8192,0.047288533051808676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,12288,0.06859839757283528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,16384,0.08950506846110026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,3584,0.021901865800221763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,5120,0.028617600599924724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,4096,0.023577600717544556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,6144,0.03347200155258179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,1024,0.008057599763075511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,1536,0.010607999563217164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,2048,0.013194666306177775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,2560,0.016503467162450155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,3072,0.02013333241144816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,768,0.00688213308652242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,256,0.003549866626660029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,512,0.005566933254400889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,128,0.0031583999594052637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,64,0.002922666569550832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,32,0.0029098667204380036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,12288,0.05257493257522583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,10240,0.04254719813664754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,8192,0.03420053323109944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,16384,65536,0.541049575805664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,16384,0.06672746340433756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,7168,0.0310591995716095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,5120,0.022971733411153158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,6144,0.026610134045283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,4096,0.021082667509714763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,3584,0.017122133572896322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,12288,65536,0.4014133453369141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,3072,0.01516480048497518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,512,0.004524800181388855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,2560,0.013034666577974955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,2048,0.01114026705423991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,768,0.0065653334061304735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,1024,0.0072170664866765336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,1536,0.009075199564297993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,256,0.003483733286460241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,128,0.0031306666632493338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,32,0.0028927999238173166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,64,0.002899199972550074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,10240,65536,0.36698452631632483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,12288,0.05012906789779663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,7168,0.029820799827575684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,8192,0.033080534140268965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,10240,0.040133333206176756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,16384,0.05899200042088827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,5120,0.02429973284403483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,6144,0.028893866141637164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,4096,0.019681066274642944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,3584,0.01745706597963969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,3072,0.015778133273124696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,2560,0.012295466661453248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,2048,0.01074773371219635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,1536,0.008476799726486206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,768,0.005731200178464254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,1024,0.006810666620731353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,128,0.0031583999594052637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,256,0.0034186666210492453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,512,0.004075733323891958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,8192,65536,0.26080533663431804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,64,0.0028512001037597655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,32,0.00288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,8192,0.032451200485229495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,7168,0.024321067333221435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,10240,0.03641813198725383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,6144,0.021268266439437866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,12288,0.03888533512751262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,16384,0.057323733965555825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,5120,0.020127999782562255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,2560,0.010627200206120808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,1536,0.00811520020167033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,3584,0.014797866344451904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,4096,0.01564800043900808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,2048,0.009914666414260864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,3072,0.012500266234079996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,1024,0.006128000219662985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,768,0.005166933437188466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,128,0.0030794667700926462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,256,0.0033226666351159418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,512,0.003976533313592275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,64,0.0028768000503381092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,32,0.0028021333118279776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,7168,65536,0.22895894050598145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,7168,0.02114026745160421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,8192,0.026190932591756182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,10240,0.03151253263155619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,12288,0.036714665095011395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,16384,0.04816960096359253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,2560,0.009947733084360758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,6144,0.020461867252985634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,3072,0.010568533341089885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,5120,0.016242133577664693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,3584,0.012300800283749897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,2048,0.00865280032157898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,4096,0.013285332918167114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,256,0.0032810665667057036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,1024,0.005578666428724925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,1536,0.007084799806276958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,512,0.003705599904060364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,768,0.00429013321797053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,128,0.002995199958483378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,64,0.0028533334533373516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,32,0.002794666588306427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,6144,65536,0.1949343999226888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,8192,0.021985065937042237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,10240,0.02581546703974406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,12288,0.028759467601776122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,7168,0.019653334220250448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,6144,0.0186901330947876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,16384,0.03929920196533203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,5120,0.015280000368754067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,1536,0.006281599899133046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,3584,0.01221226652463277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,2560,0.00834986666838328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,2048,0.007233066856861115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,3072,0.011311999956766764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,4096,0.013351466258366904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,1024,0.0046069333950678505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,5120,65536,0.18213119506835937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,512,0.0035103999078273775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,768,0.003851733356714249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,256,0.003101866692304611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,32,0.002820266783237457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,128,0.0028597332537174227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,64,0.002734933296839396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,8192,0.020307199160257975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,6144,0.01541759967803955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,7168,0.01734293301900228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,10240,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,12288,0.026078933477401735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,16384,0.03318079908688863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,5120,0.014722133676211039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,4096,0.012366933623949687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,3584,0.011455999811490376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,1536,0.005949866771697998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,3072,0.010731732845306397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,2560,0.007860266665617625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,2048,0.006916266679763794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,4096,65536,0.1308511972427368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,1024,0.004196266829967499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,512,0.0034730667869249977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,768,0.003786666691303253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,64,0.002721066772937775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,256,0.0031370667119820913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,32,0.002716800073782603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,128,0.0028714666763941447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,16384,0.031820799907048544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,7168,0.016773333152135216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,6144,0.014166399836540222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,12288,0.02327466607093811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,10240,0.02164586583773295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,8192,0.018339200814565023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3584,65536,0.12260586420694988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,5120,0.01348373293876648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,4096,0.011770666639010111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,3584,0.010518399874369304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,2560,0.007521066566308339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,3072,0.009944533308347065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,2048,0.006681600213050842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,1024,0.004127999891837438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,1536,0.00591786652803421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,32,0.0027850667635599775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,768,0.0037994667887687682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,512,0.003436800092458725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,256,0.0031317333380381264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,128,0.0028618666032950084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,64,0.0026933332284291584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,6144,0.012980266412099203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,16384,0.02671146591504415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,7168,0.014290133118629455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,8192,0.015621333320935567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,10240,0.017939200003941856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,3072,65536,0.10299306710561115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,12288,0.020948266983032225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,4096,0.01088533302148183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,5120,0.011766399939854939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,3584,0.009603200356165568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,1024,0.004078933348258337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,3072,0.009108266234397889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,2560,0.007178666690985362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,2048,0.006730666756629944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,1536,0.004948266843954722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,768,0.0037429332733154297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,256,0.0030847998956839246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,512,0.003435733417669932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,32,0.0027242665489514667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,128,0.002846933404604594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,64,0.0027072000006834666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2560,65536,0.09394240379333496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,12288,0.01842666665712992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,7168,0.012573867042859396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,6144,0.011277866363525391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,10240,0.015625600020090738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,8192,0.013470932841300964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,5120,0.011333333452542622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,16384,0.022842667500178018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,4096,0.009757866462071735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,1536,0.004636799792448679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,3072,0.008273066580295562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,3584,0.008732799688975017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,2560,0.007044266661008198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,2048,0.0057205334305763245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,1024,0.003940266619126002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,512,0.0033002667129039764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,768,0.0036533333361148832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,256,0.003005866706371307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,128,0.002810666710138321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,64,0.0026528000831604003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,32,0.0026933332284291584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,2048,65536,0.06731306711832682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,16384,0.018219733238220216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,12288,0.01495253344376882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,10240,0.012684800227483115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,8192,0.011251200238863628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,7168,0.01055999994277954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,6144,0.00981760025024414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,4096,0.00846506655216217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,5120,0.009194667140642803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,3584,0.008134399851163227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,3072,0.007572266459465027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,2560,0.009035733342170716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,1024,0.003945599993069967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,1536,0.006427733103434245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,2048,0.007730133334795634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,512,0.003352533280849457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,768,0.003571200122435888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,64,0.002681600054105123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,65536,0.05387733379999796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,32,0.002682666728893916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,256,0.002937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1536,128,0.0028138667345046996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,10240,0.010863999525705974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,12288,0.011988266309102377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,16384,0.014587733149528503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,7168,0.008899199962615966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,5120,0.008008533219496409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,8192,0.009867733716964722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,4096,0.006738133231798808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,6144,0.008044800162315369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,1024,0.003969066590070724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,3584,0.006939733525117238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,3072,0.006426666676998138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,2560,0.005962666869163513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,2048,0.005303466816743215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,65536,0.03862506548563639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,1536,0.004609066744645437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,768,0.003610666592915853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,512,0.003307733436425527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,128,0.0028757333755493166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,64,0.002681600054105123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,32,0.0027093333502610523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,1024,256,0.0029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,16384,0.011754666765530903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,10240,0.009357866644859315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,12288,0.009659733374913533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,8192,0.008586666981379191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,6144,0.007292800148328145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,7168,0.007961600025494894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,3584,0.006794666747252147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,5120,0.007485866546630859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,4096,0.006404266754786174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,65536,0.029960532983144123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,3072,0.006417066852251689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,1024,0.004916266600290934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,2560,0.008595200379689534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,1536,0.00613013356924057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,2048,0.007354666789372762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,512,0.0037621334195137024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,768,0.0042805333932240805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,64,0.002796799937884013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,256,0.003219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,128,0.0029813334345817565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,768,32,0.00278613343834877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,12288,0.008743466933568318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,16384,0.009701333443323771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,8192,0.007309866448243459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,10240,0.008146133522192638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,5120,0.006668800115585327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,7168,0.006809600194295247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,6144,0.006306133170922597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,65536,0.022115200757980347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,4096,0.006266666452089946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,3072,0.006481066842873891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,3584,0.006826666494210561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,2048,0.005482666691144307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,2560,0.00617386649052302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,1536,0.0047775998711586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,1024,0.003992533435424169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,512,0.003291733314593633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,64,0.002735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,256,0.003018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,768,0.0036309334139029183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,128,0.0029258665939172106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,512,32,0.002739199995994568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,10240,0.006569600105285645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,12288,0.006541866560777028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,16384,0.00726506660381953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,8192,0.006404266754786174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,7168,0.006243200103441874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,65536,0.013727999726931252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,4096,0.007495466868082683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,6144,0.006262399752934774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,5120,0.008053333560625712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,3584,0.006775466601053874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,2560,0.005925333499908448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,3072,0.006381866832574208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,1024,0.003925333420435587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,2048,0.00525439977645874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,1536,0.004588800172011057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,768,0.0035306667288144433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,128,0.002765866617361705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,512,0.0031957333286603295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,256,0.002914133419593175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,32,0.002705066651105881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,256,64,0.002631466587384542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,16384,0.0063274666666984555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,10240,0.008027733365694682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,65536,0.01106346646944682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,12288,0.0063285330931345625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,8192,0.00737066666285197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,6144,0.0062165334820747375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,7168,0.0067775999506314594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,3584,0.0066442668437957765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,5120,0.006615466872851054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,4096,0.006227200229962667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,3072,0.0062730665008227035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,1536,0.004622933268547058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,2560,0.005952000121275584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,768,0.003555200000603994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,2048,0.005243733525276184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,1024,0.00388373335202535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,256,0.0029653333127498626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,512,0.0032842665910720824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,64,0.002674133330583572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,128,0.0027744000156720476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,65536,0.008350933591524761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,128,32,0.002674133330583572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,16384,0.007163733243942261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,12288,0.006674133241176605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,10240,0.006631466746330261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,7168,0.006293333570162455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,8192,0.006498133142789205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,5120,0.006620799998442332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,6144,0.006086400151252747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,4096,0.006208000083764395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,3584,0.006662400066852569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,1024,0.003700266778469086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,2048,0.004936533172925314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,3072,0.006312533219655354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,2560,0.005910400052865346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,768,0.0033301333586374915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,1536,0.004307200014591217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,128,0.002665599932273229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,512,0.0031637333333492277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,64,0.002587733417749405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,256,0.0028213332096735638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,64,32,0.0025781333446502686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,16384,0.006346666812896728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,65536,0.008136533200740814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,12288,0.00626453310251236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,10240,0.0063498665889104204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,7168,0.006110933423042297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,8192,0.006449066599210103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,6144,0.006138666470845541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,4096,0.006037333110968272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,5120,0.006472533444563548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,3584,0.00652266691128413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,2048,0.005294933418432872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,2560,0.005859200159708659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,3072,0.006076799829800924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,1536,0.004557866851488749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,768,0.003469866762558619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,1024,0.0038474666575590765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,128,0.0027200000981489818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,512,0.003156266609827677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,64,0.002537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,256,0.0028437333802382152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,float16,1,32,32,0.002536533276240031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,4096,3.2177078247070314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,5120,4.026576995849609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,6144,4.897486877441406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,7168,5.650068155924479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,8192,6.5978963216145825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,3584,2.851588185628255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,3072,2.44009272257487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,2560,2.0435338338216145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,2048,1.701349385579427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,1024,0.8940127690633138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,1536,1.304846954345703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,512,0.5536106745402019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,10240,7.934968566894531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,768,0.7204021453857422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,32,0.3611722628275553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,64,0.36857706705729165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,256,0.40776640574137374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,128,0.3719872156778971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,12288,9.687563069661458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,7168,1.4833588918050131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,8192,1.7263200124104816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,16384,12.769760131835938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,6144,1.3021514892578125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,10240,2.1819231669108072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,12288,2.590048980712891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,3072,0.6764469146728516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,3584,0.7696896235148112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,2560,0.5741066614786784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,5120,1.0771349589029948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,4096,0.8861162821451822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,256,0.11343573729197184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,768,0.21650239626566567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,2048,0.4617301305135091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,1536,0.3654506683349609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,512,0.16660374005635578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,1024,0.2632213274637858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,64,0.10108053684234619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,16384,3.471413421630859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,128,0.10265920162200928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,32,0.10071252981821696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,6144,0.9944480260213217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,7168,1.155410130818685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,8192,1.3116128285725912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,10240,1.6471637725830077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,12288,1.9637536366780597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,5120,0.825764274597168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,4096,0.6750069300333659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,3584,0.5898901621500652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,3072,0.5221023877461751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,1024,0.20500799814860024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,2048,0.35838826497395837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,1536,0.2820821444193522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,2560,0.4373152097066243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,16384,2.627879587809245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,768,0.17069226900736492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,256,0.09361493587493896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,512,0.13052586714426678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,128,0.08228586514790853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,64,0.08130026658376058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,32,0.07951040267944336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,7168,1.0168490727742514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,8192,1.1611061096191406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,10240,1.4426347096761067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,12288,1.7278731028238934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,5120,0.7326784133911133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,6144,0.8753952026367188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,4096,0.5954218546549479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,16384,2.26743901570638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,3584,0.5181472142537434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,1536,0.24770132700602213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,2560,0.38453547159830725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,3072,0.4516341209411621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,2048,0.318013858795166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,1024,0.18111573855082194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,768,0.14909226099650066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,64,0.07083306312561036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,32,0.0695690631866455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,256,0.08166613578796386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,128,0.07198826471964517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,512,0.11677546501159668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,8192,0.9581205368041992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,10240,1.2278784434000651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,12288,1.4733194986979166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,65536,13.925283813476563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,16384,1.9494869232177734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,7168,0.8412490844726562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,6144,0.7331296284993489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,5120,0.6085450490315755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,3584,0.42229652404785156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,4096,0.4839882532755534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,3072,0.3662325223286947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,2560,0.3086432139078776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,2048,0.2486944039662679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,1536,0.2128885269165039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,768,0.12364693482716878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,256,0.06913599967956544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,512,0.09342613220214843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,1024,0.15028799374898275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,128,0.060695465405782065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,32,0.05922453403472901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,64,0.05957653522491455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,65536,10.80260721842448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,8192,0.8697429021199545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,10240,1.089731216430664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,12288,1.302948252360026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,7168,0.7748416264851887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,16384,1.7262453715006512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,6144,0.6567882537841797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,4096,0.445798397064209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,5120,0.5499765396118164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,3584,0.39900051752726234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,1536,0.18829545974731446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,2048,0.24119466145833335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,2560,0.29314241409301756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,1024,0.13816426595052084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,3072,0.3455850601196289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,512,0.08606186707814535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,768,0.11291626294453938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,256,0.0632149338722229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,128,0.05555200179417928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,64,0.05516159931818644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,32,0.054339198271433506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,65536,9.50426534016927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,7168,0.7063157399495442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,8192,0.7924362818400066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,10240,0.9770922978719077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,12288,1.1930335998535155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,5120,0.505949878692627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,6144,0.6091967900594075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,4096,0.4030314763387044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,16384,1.5505301157633462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,3584,0.359220282236735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,3072,0.31001173655192055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,2560,0.2671168009440104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,1024,0.12510933081309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,2048,0.21771732966105142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,1536,0.1699541409810384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,768,0.10276906490325928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,512,0.0784447987874349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,128,0.050054399172465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,256,0.058024533589680995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,64,0.04982399940490723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,65536,7.865083821614583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,32,0.04895573457082113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,7168,0.6136618932088216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,8192,0.6972618738810221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,10240,0.8652704238891602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,12288,1.0516981124877929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,6144,0.5309023857116699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,5120,0.4469312032063802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,4096,0.3647029240926107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,16384,1.3682603200276693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,3584,0.32578986485799155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,3072,0.281607468922933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,2560,0.23953599929809571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,768,0.09458239873250326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,2048,0.19803519248962403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,1024,0.1146666685740153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,512,0.07455360094706218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,1536,0.15495039621988932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,256,0.053928534189860024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,64,0.04420906702677409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,128,0.044761598110198975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,32,0.044343467553456625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,65536,7.051039123535157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,8192,0.6013792037963868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,7168,0.5348864237467448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,10240,0.7468714396158854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,12288,0.9040778477986654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,4096,0.3146773338317871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,16384,1.1812554677327474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,6144,0.4581568082173665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,5120,0.3878709475199381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,3584,0.28144747416178384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,1536,0.1342591921488444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,1024,0.09903679688771566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,2560,0.20865279833475747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,2048,0.17169814109802245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,3072,0.24412585894266764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,256,0.04626239935557048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,512,0.0627946654955546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,64,0.039510401089986165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,128,0.040004265308380124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,32,0.039095465342203775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,768,0.08166399796803793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,65536,6.310565185546875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,8192,0.5621930440266927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,10240,0.6923466364542643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,7168,0.4939541180928548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,12288,0.8245642979939779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,6144,0.42878293991088867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,5120,0.3592543919881185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,4096,0.29348694483439125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,16384,1.0942347208658854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,3584,0.2599818706512451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,2560,0.19399466514587402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,1536,0.12472106615702312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,2048,0.16049280166625976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,1024,0.09251840114593506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,3072,0.22765866915384927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,768,0.07709759871164958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,128,0.03818986813227336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,64,0.03780906597773234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,256,0.04402559995651245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,512,0.059239466985066734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,32,0.03754133383433024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,65536,5.65548350016276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,8192,0.514958922068278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,7168,0.45794986089070633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,10240,0.6381343841552735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,12288,0.7626752217610677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,4096,0.2693813323974609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,5120,0.33429225285847985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,6144,0.39152533213297525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,16384,1.0149802525838216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,3584,0.24076053301493325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,2048,0.14862507184346516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,2560,0.1808832009633382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,1024,0.08660266399383545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,3072,0.20967680613199868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,1536,0.11618560155232746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,512,0.05615466833114624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,768,0.07150293191274007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,256,0.042513068517049155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,64,0.03595626751581828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,128,0.03684586683909098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,32,0.035352532068888345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,65536,4.750949096679688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,7168,0.4201621373494466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,8192,0.4778101285298665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,10240,0.5853439966837566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,12288,0.6993471781412761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,5120,0.30971412658691405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,4096,0.2523711999257406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,6144,0.3647253354390462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,16384,0.9230506896972657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,3584,0.22411840756734214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,3072,0.19680746396382648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,1024,0.08058880170186361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,2048,0.13952639897664387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,768,0.06749119758605956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,2560,0.1679360071818034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,1536,0.10898239612579345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,128,0.033045333623886106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,512,0.053141331672668456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,256,0.040030932426452635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,32,0.03216106692949931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,64,0.03239573240280151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,65536,4.465873209635417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,8192,0.428331724802653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,7168,0.3802538553873698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,10240,0.5291285196940104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,12288,0.6315807978312175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,5120,0.27955201466878254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,6144,0.32848320007324217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,4096,0.22778773307800293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,16384,0.8338581085205078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,3584,0.20252906481424965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,1024,0.07311680316925048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,3072,0.17723840077718098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,1536,0.09821120103200277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,2048,0.12604373296101887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,2560,0.15174293518066406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,256,0.03467413187026978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,128,0.029452800750732422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,768,0.06042240063349406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,512,0.047014399369557695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,64,0.02911893328030904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,32,0.027882667382558186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,65536,4.018523661295573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,7168,0.3405002593994141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,8192,0.3868149439493815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,10240,0.4754826545715332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,12288,0.5680629094441731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,5120,0.25125865936279296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,4096,0.20517013867696127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,16384,0.748643175760905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,6144,0.2958847999572754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,3584,0.18296960194905598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,1024,0.06521493196487427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,1536,0.08925013542175293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,2048,0.11445226669311523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,3072,0.15967466036478678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,2560,0.13725652694702148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,768,0.05326186815897623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,256,0.030899200836817426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,128,0.026230400800704955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,64,0.025195733706156416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,512,0.04142080148061116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,32,0.025101866324742633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,65536,3.6344660441080725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,7168,0.30032428105672204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,8192,0.34041172663370767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,10240,0.4190677324930827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,12288,0.5001194636027019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,6144,0.2600874741872152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,5120,0.22097600301106773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,4096,0.18065279324849445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,16384,0.66047789255778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,3584,0.1608469327290853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,3072,0.14116053581237792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,2560,0.12014933427174886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,1024,0.052843733628590905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,1536,0.07585919698079427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,2048,0.09956160386403402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,768,0.0429909348487854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,512,0.03306026657422383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,256,0.024909865856170655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,128,0.021415466070175172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,64,0.021169066429138184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,32,0.020326399803161622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,65536,3.317801666259766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,7168,0.28401813507080076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,8192,0.31967252095540366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,10240,0.3948416074117025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,12288,0.47223574320475264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,5120,0.20753706296284996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,6144,0.24607146581014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,4096,0.16959786415100098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,16384,0.6218730926513671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,3584,0.15096213022867838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,2560,0.1136309305826823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,3072,0.1326325337092082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,2048,0.09277333418528239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,1536,0.07000426451365152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,1024,0.04720746676127116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,512,0.030488532781600953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,768,0.03828266859054565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,256,0.023715200026830037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,128,0.020106667280197145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,64,0.019351466496785482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,32,0.019355734189351402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,65536,2.93570556640625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,7168,0.26226666768391926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,8192,0.2973141352335612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,10240,0.3669461250305176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,12288,0.43649813334147136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,5120,0.19316266377766927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,6144,0.22722773551940917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,4096,0.15806719462076824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,16384,0.5752000172932943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,3584,0.1412000020345052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,3072,0.12328106562296551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,2048,0.08527999718983968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,1024,0.042778666814168295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,1536,0.0629589319229126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,2560,0.10481706460316975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,768,0.034441598256429035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,32,0.017992534240086875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,256,0.02221333384513855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,64,0.01832533280054728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,128,0.0191103994846344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,512,0.028394667307535808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,65536,2.585388692220052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,7168,0.24387307167053224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,8192,0.2761962572733561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,10240,0.3405290603637695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,12288,0.4065375963846843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,5120,0.17905492782592775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,4096,0.14663039843241374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,6144,0.2118197282155355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,16384,0.535102907816569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,3584,0.13119680086771648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,1024,0.038482133547465006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,2048,0.07802133560180664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,3072,0.11400746504465739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,1536,0.05619093179702759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,2560,0.09661226272583008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,64,0.01728746692339579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,256,0.020719999074935914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,512,0.026245333751042682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,128,0.017658666769663493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,768,0.03203199903170268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,768,65536,2.4504063924153643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,32,0.01713599960009257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,7168,0.2380074659983317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,8192,0.2710549354553223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,10240,0.33503573735555015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,12288,0.39759254455566406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,5120,0.17511040369669598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,6144,0.2067413330078125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,4096,0.1431978702545166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,16384,0.524620787302653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,3584,0.12674773534138997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,1536,0.0524885336558024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,1024,0.03583253224690755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,2560,0.09296320279439291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,3072,0.11040639877319336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,2048,0.07459200223286946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,512,65536,2.2650410970052084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,128,0.01717653274536133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,64,0.016615466276804606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,256,0.01978666583697001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,512,0.025012266635894776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,768,0.030242133140563964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,32,0.015966932972272238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,7168,0.2376906712849935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,8192,0.26965014139811194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,10240,0.332535457611084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,12288,0.39636052449544273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,6144,0.2058272043863932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,5120,0.17451413472493488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,4096,0.14205973943074543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,16384,0.5246517181396484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,3584,0.12617279688517252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,3072,0.11005120277404785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,2560,0.09236480394999186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,1024,0.0344703992207845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,2048,0.07350613276163737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,1536,0.05171626806259155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,256,65536,2.1129290262858076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,768,0.03039146661758423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,256,0.019706666469573975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,512,0.025013333559036253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,64,0.016148266196250916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,128,0.017066667477289833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,32,0.0158869336048762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,7168,0.23660052617390953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,8192,0.2688511848449707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,10240,0.33218558629353845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,12288,0.3950005213419596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,6144,0.20509120623270669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,5120,0.17424960136413575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,4096,0.14216960271199544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,16384,0.5207541465759278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,3584,0.12594026724497479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,3072,0.10939199924468994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,2560,0.09216639995574952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,1024,0.03542933464050293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,1536,0.050578133265177405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,2048,0.07234773635864258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,768,0.02997013330459595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,512,0.024590933322906496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,32,0.015258666872978211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,128,65536,2.0694325764973955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,128,0.016768000523249307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,256,0.019451733430226645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,64,0.016013866662979125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,6144,2.400029754638672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,7168,2.75427729288737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,8192,3.169272613525391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,10240,4.084909820556641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,64,65536,2.053558349609375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,12288,4.752380879720052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,5120,1.993019739786784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,4096,1.5098101298014321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,3584,1.3398485819498698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,2048,0.8060288111368814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,1024,0.45282026926676433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,3072,1.1672981262207032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,2560,0.9937962849934896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,768,0.37069972356160485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,1536,0.6300959904988607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,512,0.28978134791056315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,256,0.21155519485473634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,128,0.19498027165730794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,64,0.19151466687520344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,32,0.19268159866333007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,16384,6.402669779459636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,7168,0.7571039835611979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,8192,0.8838965098063151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,6144,0.6541119893391927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,10240,1.0927647908528646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,12288,1.2858100891113282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,4096,0.4497952143351237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,3584,0.3912960052490234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,5120,0.547327995300293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,3072,0.34175039927164713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,16384,1.693202082316081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8192,32,65536,2.0526634216308595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,2560,0.2906293233235677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,512,0.08648426532745361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,256,0.0652565320332845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,1024,0.13709759712219238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,128,0.05928639968236288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,1536,0.19098560015360516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,768,0.11393173535664876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,2048,0.24364800453186036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,64,0.05933653513590494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,32,0.05944639841715495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,6144,0.5093152046203613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,7168,0.5806602478027344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,8192,0.6742730458577474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,10240,0.8332970937093099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,12288,0.9851584116617838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,3584,0.3044735908508301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,4096,0.3420960108439127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,5120,0.4266784032185872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,3072,0.26403840382893884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,2560,0.22387092908223472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,16384,1.3016394297281901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,1024,0.11145173708597819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,1536,0.14850667317708333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,2048,0.18785600662231444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,768,0.09294400215148926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,64,0.04939519961675008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,128,0.04914666811625163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,256,0.0544981320699056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,512,0.07057493527730306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,32,0.049141331513722734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,7168,0.517632007598877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,8192,0.5782976150512695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,10240,0.7138261159261068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,12288,0.8714197158813477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,6144,0.4478282610575358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,16384,1.1323210398356118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,5120,0.3720149358113607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,4096,0.30330559412638347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,3584,0.2712298711140951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,1024,0.10155733426411946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,1536,0.1333408037821452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,2560,0.19981333414713542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,2048,0.1666111946105957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,3072,0.23466666539510092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,256,0.050760531425476076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,512,0.06745813687642416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,768,0.08503680229187012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,128,0.04419093529383342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,64,0.04507199923197429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,32,0.04462399880091349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,8192,0.48386561075846357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,65536,7.1591145833333325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,10240,0.6079903920491536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,12288,0.7166720072428385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,16384,0.962005360921224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,6144,0.3723509470621745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,7168,0.42638505299886065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,5120,0.3142079989115397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,4096,0.25429439544677734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,2560,0.16846933364868164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,3584,0.22815465927124023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,2048,0.14079680442810058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,1536,0.11205973625183105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,3072,0.1961674690246582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,1024,0.08466560045878092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,256,0.04341760079065959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,512,0.05539520184199015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,768,0.07127253214518228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,128,0.03904853264490764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,64,0.03917226791381836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,32,0.03914346694946289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,65536,5.351956176757812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,8192,0.43962453206380203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,7168,0.3880277315775553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,10240,0.5436255772908528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,12288,0.6509482701619466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,4096,0.2319712003072103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,16384,0.8748981475830078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,5120,0.2843935966491699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,6144,0.3369162559509277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,3584,0.20557012557983398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,65536,4.647542317708334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,3072,0.17901867230733234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,2560,0.15486399332682294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,1024,0.07821226914723714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,1536,0.10298986434936523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,2048,0.12792960007985432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,256,0.040046934286753336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,512,0.05201706488927206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,128,0.03699306646982829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,768,0.06663360198338827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,64,0.03763519922892253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,32,0.037539199988047285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,7168,0.34821974436442055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,8192,0.39563627243041993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,10240,0.4922218640645345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,12288,0.5842570622762044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,16384,0.7699210484822591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,4096,0.21124693552652993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,5120,0.25564160346984866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,6144,0.3016149202982584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,65536,3.8510358174641928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,3584,0.186137596766154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,3072,0.16232746442159016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,1024,0.07209386825561523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,1536,0.09421760241190592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,2048,0.11708266735076904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,2560,0.14039467175801595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,768,0.060244266192118326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,512,0.048214399814605714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,128,0.03515733480453491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,256,0.03889919916788737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,64,0.035613866647084554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,32,0.03466666539510091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,7168,0.3163893381754557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,8192,0.35605974197387696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,10240,0.4402336120605469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,12288,0.5198399861653645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,16384,0.688860829671224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,6144,0.27560532887776695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,5120,0.2322474638621012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,4096,0.19039680163065592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,3584,0.16797760327657063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,65536,3.5394996643066405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,3072,0.14697813987731934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,1536,0.08700479666392008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,512,0.04654719829559326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,2048,0.10646186669667561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,1024,0.06635520060857138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,2560,0.12664106686909993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,768,0.05634133418401083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,128,0.031228800614674885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,32,0.031623466809590654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,256,0.036423468589782716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,64,0.03105173309644063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,7168,0.27433598836263023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,8192,0.30975252787272134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,10240,0.38009599049886067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,12288,0.4521034558614095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,6144,0.23897813161214193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,16384,0.601306660970052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,5120,0.20298666954040528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,3584,0.1477717399597168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,4096,0.16573227246602376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,65536,3.177637227376302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,1536,0.07634133497873942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,1024,0.05836693445841471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,2560,0.11050346692403157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,768,0.04978239933649699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,2048,0.09368213017781576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,3072,0.12784639994303387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,512,0.04018453359603882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,64,0.027641600370407103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,128,0.027241599559783936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,256,0.03161493341128031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,32,0.028040534257888793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,7168,0.26594346364339194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,8192,0.29970451990763347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,10240,0.37082026799519857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,12288,0.44063574473063155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,6144,0.23130133946736656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,5120,0.19678187370300293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,4096,0.1614026705423991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,16384,0.5728362401326497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,3584,0.14390400250752766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,65536,2.762655893961589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,1024,0.057102934519449866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,1536,0.0743018627166748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,2048,0.09152426719665527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,768,0.043996798992156985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,2560,0.10784213542938233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,3072,0.12539199988047284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,512,0.03569493293762207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,256,0.029309866825739543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,128,0.026918399333953857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,64,0.02698880036671956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,32,0.0273087998231252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,7168,0.2360703945159912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,8192,0.26713174184163413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,10240,0.32932907740275064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,12288,0.38814080556233727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,4096,0.1431850592295329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,16384,0.5163498560587565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,6144,0.20500052769978844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,5120,0.17456852595011393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,65536,2.376881154378255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,3584,0.1277216037114461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,768,0.0422815998395284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,1536,0.06642773151397705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,1024,0.050076798597971595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,2560,0.09782506624857584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,2048,0.08222080071767171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,3072,0.1119477351506551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,512,0.035035733381907144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,64,0.025166932741800947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,256,0.028102399905522664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,128,0.025067732731501265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,32,0.024680533011754355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,7168,0.21481812795003258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,8192,0.24321494102478028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,10240,0.299291737874349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,12288,0.35886081059773767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,5120,0.15902613004048666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,6144,0.18741973241170246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,3584,0.1175605297088623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,16384,0.4668416023254395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,4096,0.131113600730896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,65536,2.2495999654134113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,1024,0.043095465501149496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,1536,0.05832533439000448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,768,0.03712960084279378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,2048,0.07364373207092285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,2560,0.08704213301340738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,3072,0.10105600357055664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,64,0.02177919944127401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,256,0.02437333265940348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,512,0.030921600262324017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,128,0.021782400210698445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,32,0.021677867571512858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,7168,0.19464534123738605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,8192,0.21977599461873373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,10240,0.2706037203470866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,12288,0.32087786992390954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,65536,1.9951627095540363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,5120,0.1453813393910726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,3584,0.10593706766764324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,6144,0.17006826400756836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,16384,0.42524054845174153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,4096,0.11922346750895182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,1024,0.03851199944814046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,768,0.03373973369598389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,2048,0.0645845333735148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,1536,0.05100800196329752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,2560,0.07876906394958497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,3072,0.09247573216756186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,512,0.02746773362159729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,256,0.02251519958178202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,128,0.020578134059906005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,64,0.020253866910934448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,32,0.020218666394551596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,7168,0.17856426239013673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,8192,0.20058986345926919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,10240,0.2460458596547445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,12288,0.29177494049072267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,4096,0.10935680071512859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,16384,0.3857994715372721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,65536,1.8371754964192708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,5120,0.13231253623962402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,6144,0.15533653895060223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,3584,0.09641599655151367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,3072,0.08335253397623697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,2560,0.07091306845347087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,2048,0.05695786476135254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,768,0.02985493342081706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,1024,0.035046398639678955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,1536,0.045134933789571126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,256,0.021331199010213218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,512,0.025624533494313557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,128,0.019035732746124266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,32,0.018914133310317993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,64,0.01913493275642395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,7168,0.15844586690266926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,8192,0.17805760701497395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,10240,0.21801493962605795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,12288,0.25782079696655275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,16384,0.3396832148234049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,65536,1.6405471801757812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,6144,0.138264528910319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,5120,0.11765866279602051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,4096,0.0955456018447876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,3584,0.08430399894714355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,768,0.026951466004053754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,1024,0.030241066217422487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,1536,0.0389631986618042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,2048,0.048283731937408446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,2560,0.059432534376780186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,3072,0.07247573534647624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,512,0.023290665944417317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,256,0.019419733683268228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,128,0.01802133321762085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,64,0.017921066284179686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,32,0.01757226586341858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,7168,0.14930666287740071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,8192,0.16802560488382975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,10240,0.2057408014933268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,12288,0.24315733909606935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,16384,0.31857598622639977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,65536,1.4817248026529948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,6144,0.13008106549580892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,5120,0.11145386695861817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,3584,0.07809706528981528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,4096,0.09011946519215902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,3072,0.06658879915873209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,2560,0.05544000069300333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,512,0.022392533222834268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,1024,0.02853013277053833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,2048,0.044188801447550455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,768,0.025733333826065064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,1536,0.03638399839401245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,128,0.01734506686528524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,256,0.01890666683514913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,64,0.016898133357365928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,32,0.016774400075276693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,7168,0.1396447976430257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,8192,0.15697387059529622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,10240,0.19127039909362792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,12288,0.2257962703704834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,65536,1.2927647908528646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,16384,0.29596160252889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,3584,0.07310293515523275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,4096,0.08412480354309082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,5120,0.10367039839426677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,6144,0.1216480016708374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,3072,0.06118079821268717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,2048,0.04112533330917358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,1024,0.027346134185791016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,1536,0.033720533053080246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,2560,0.05081386566162109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,768,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,256,0.01821546753247579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,512,0.021542400121688843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,128,0.016730666160583496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,64,0.01648853321870168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,32,0.016747732957204185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,7168,0.13338665962219237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,8192,0.15007039705912273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,10240,0.1820576032002767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,12288,0.2136533260345459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,768,65536,1.2270848592122396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,16384,0.28376534779866536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,5120,0.09783893426259359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,4096,0.07811306317647299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,6144,0.11569279829661053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,3584,0.0677781343460083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,2560,0.04549653530120849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,3072,0.0562549352645874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,768,0.023230934143066408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,1536,0.0314794659614563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,1024,0.026338134209314985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,2048,0.037281068166097005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,512,0.020568533738454183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,256,0.01770240068435669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,64,0.015875200430552162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,128,0.016432000199953715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,32,0.015986133615175882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,7168,0.12746346791585286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,8192,0.14311253229777018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,12288,0.2078538735707601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,10240,0.17533973058064778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,512,65536,1.1490186055501304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,16384,0.2757173220316569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,3584,0.06334720055262247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,4096,0.07422506809234619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,5120,0.09336533546447753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,6144,0.11095253626505534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,2560,0.043755733966827394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,3072,0.052490667502085364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,1536,0.0306005338827769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,768,0.02260266741116842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,1024,0.025135999917984007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,2048,0.03515093326568604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,512,0.019887999693552653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,256,0.01699413259824117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,128,0.015851733088493348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,64,0.015787733594576518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,32,0.015826132893562318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,7168,0.12628906567891438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,8192,0.14311680793762208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,10240,0.17438826560974122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,12288,0.2058272043863932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,256,65536,1.0732266743977865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,4096,0.0736682653427124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,16384,0.2732074737548828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,6144,0.1098090648651123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,5120,0.09281706809997559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,3584,0.06227840185165405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,3072,0.05176426569620768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,2560,0.04316693147023519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,2048,0.03579306602478027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,1024,0.02525866627693176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,1536,0.03022293249766032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,512,0.019817600647608437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,768,0.022446932395299275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,128,0.015709867080052696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,256,0.016897066434224447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,64,0.015528532862663268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,32,0.015256533026695251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,7168,0.1285258690516154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,8192,0.14520959854125975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,10240,0.17653652826944988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,12288,0.20841066042582193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,4096,0.07297386328379313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,128,65536,1.0407701492309571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,16384,0.2715402603149414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,5120,0.09255253473917643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,6144,0.11226452986399334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,3584,0.06216959953308106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,1024,0.024946133295694985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,768,0.02218666672706604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,1536,0.030188800891240437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,2048,0.03479359944661458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,3072,0.0511680006980896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,2560,0.04248960018157959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,256,0.01670080025990804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,512,0.019643733898798622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,32,0.015184000134468079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,128,0.015852800011634825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,64,0.014974932869275412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,64,65536,1.0356042861938477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,6144,1.1335946400960286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,7168,1.3344170888264975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,8192,1.6085205078125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,5120,0.9716447830200196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,10240,1.9443295796712239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,4096,0.7718613306681316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,12288,2.2665110270182294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,3072,0.5978975931803385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,3584,0.6802165349324544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4096,32,65536,1.0303658803304037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,768,0.19841279983520507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,512,0.15568000475565594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,256,0.11842133204142254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,1024,0.23720854123433432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,2560,0.5061375935872395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,2048,0.40680958429972336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,128,0.10464213689168293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,1536,0.3220575968424479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,64,0.10429226557413737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,32,0.10215573310852051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,16384,3.151835632324219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,6144,0.32543999354044595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,7168,0.375001589457194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,10240,0.5354421615600586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,8192,0.4241333325703939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,12288,0.6261386871337891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,5120,0.2738815943400065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,3072,0.17701759338378906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,2560,0.15056853294372557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,4096,0.22490986188252768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,3584,0.19945279757181805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,1536,0.10181972980499268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,1024,0.07795093059539795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,16384,0.8513482411702474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,2048,0.12596159776051838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,768,0.06543466647466024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,512,0.05222400029500326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,64,0.039084800084431964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,256,0.04196373224258423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,128,0.03845013380050659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,32,0.03900479873021444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,6144,0.2572490692138672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,7168,0.294708251953125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,8192,0.33638505935668944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,10240,0.42159039179484054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,12288,0.49877974192301433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,3072,0.1430474599202474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,5120,0.21833386421203613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,3584,0.16103572845458985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,4096,0.17914560635884602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,16384,0.6533407847086589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,2560,0.12255040009816487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,2048,0.10414613087972005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,128,0.03454826672871907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,1536,0.08398719628651938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,768,0.055048533280690516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,512,0.044544001420338944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,1024,0.06440426508585612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,256,0.036558934052785236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,64,0.035079467296600345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,32,0.03496640125910441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,7168,0.26273600260416663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,8192,0.299124272664388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,10240,0.366323184967041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,12288,0.4344181378682454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,5120,0.19447147051493327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,16384,0.5708373387654622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,6144,0.22676159540812174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,4096,0.16045546531677246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,3584,0.14397974014282228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,3072,0.12799253463745117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,2560,0.10987520217895508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,2048,0.09266773064931234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,768,0.0508565346399943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,1024,0.059531732400258386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,1536,0.07608319918314616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,512,0.04257920185724894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,128,0.030649600426355998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,256,0.03411519924799601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,64,0.03170773386955261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,32,0.03153706590334575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,65536,3.419916788736979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,8192,0.2502986590067545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,10240,0.30792214075724283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,7168,0.222542937596639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,12288,0.36759678522745765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,16384,0.48555733362833664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,3584,0.12132586638132732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,4096,0.13537599245707194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,6144,0.19239253997802735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,3072,0.10810453097025555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,5120,0.16412053108215333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,65536,2.595362091064453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,1024,0.05127466519673666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,2560,0.09356799920399984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,2048,0.07963199615478515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,768,0.04413439830144246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,1536,0.06514346599578857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,512,0.037573333581288657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,128,0.027116799354553224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,256,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,64,0.0269375999768575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,32,0.02781440019607544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,7168,0.19784213701883951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,8192,0.2230133374532064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,10240,0.2784351984659831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,12288,0.3315349260965983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,65536,2.2729718526204428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,4096,0.12086719671885174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,5120,0.14443413416544598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,16384,0.43054825464884444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,6144,0.17048853238423664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,3584,0.10772586663564046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,3072,0.09592639605204265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,2048,0.0695039987564087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,2560,0.08304320176442465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,1536,0.05670613447825114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,1024,0.04339413245519002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,768,0.03622293472290039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,128,0.02598506609598796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,256,0.027847466866175334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,64,0.026280534267425538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,512,0.03148266673088074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,32,0.02611946662267049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,7168,0.18222826321919758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,8192,0.20492480595906576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,10240,0.2524991989135742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,65536,1.8710709889729817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,12288,0.2993226687113444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,16384,0.3965973218282064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,5120,0.13481705983479816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,3072,0.09037866592407226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,6144,0.15785706837972005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,4096,0.11213866869608562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,3584,0.10094292958577473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,2560,0.07973972956339517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,2048,0.06761600176493326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,1536,0.05557119846343994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,512,0.03263253370920817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,1024,0.04345279932022095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,768,0.038040534655253096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,256,0.02635093331336975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,128,0.024363734324773154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,64,0.024681599934895833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,32,0.024571732680002848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,8192,0.1824757258097331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,7168,0.16043093999226887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,12288,0.26899518966674807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,10240,0.2254197279612223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,65536,1.748488489786784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,16384,0.3479893366495768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,6144,0.13854079246520995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,5120,0.11810239950815837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,4096,0.1004149357477824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,3584,0.09032853444417319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,3072,0.07998080253601074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,1024,0.03786666790644328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,2560,0.06991039911905925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,2048,0.05876906712849935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,1536,0.04780906836191813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,768,0.032689066727956136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,512,0.028207999467849732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,256,0.023562665780385336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,128,0.021591466665267945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,64,0.0215338667233785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,32,0.021466666460037233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,7168,0.13976319630940753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,8192,0.1585898717244466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,10240,0.19479680061340332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,12288,0.2337951978047689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,65536,1.5525824228922525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,5120,0.10460053284962971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,16384,0.303218142191569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,6144,0.12131093343098957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,3584,0.07964373429616292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,3072,0.07065066496531168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,4096,0.08768746852874756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,1024,0.03314560055732727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,2560,0.05997759898503622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,2048,0.05016533136367798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,1536,0.0414133350054423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,768,0.02898026704788208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,512,0.02453546722730001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,256,0.021228800217310585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,128,0.019766400257746376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,32,0.01983573238054911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,64,0.020004266500473024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,7168,0.13788480758666993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,8192,0.15616426467895508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,65536,1.36398073832194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,10240,0.19285866419474285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,12288,0.22987732887268067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,16384,0.29733867645263673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,6144,0.12030613422393799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,3584,0.07712213198343912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,3072,0.06814613342285156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,5120,0.10251413186391194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,4096,0.0858890692392985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,1024,0.0329365332921346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,1536,0.04057919979095459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,2560,0.05866346756617228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,2048,0.04874560038248698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,512,0.0248799999554952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,768,0.028705066442489623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,256,0.02139520049095154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,128,0.019692800442377725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,32,0.019951999187469482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,64,0.02002133329709371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,7168,0.12304213047027587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,10240,0.17074666023254395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,65536,1.1676949818929037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,8192,0.13749225934346515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,12288,0.20387093226114908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,16384,0.26438719431559243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,3584,0.06843626499176025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,4096,0.077565868695577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,5120,0.09252693653106689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,6144,0.10722133318583171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,3072,0.06041706800460815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,2560,0.05123840173085531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,512,0.022973867257436116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,1024,0.029638399680455525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,768,0.026667733987172444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,1536,0.03683306773503621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,2048,0.04345173438390096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,128,0.01883093317349752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,256,0.020173867543538414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,64,0.018894932667414346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,32,0.018901334206263224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,65536,1.1486186981201172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,8192,0.12862079938252766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,7168,0.11355413595835369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,10240,0.1565930684407552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,12288,0.1863594690958659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,16384,0.2417461395263672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,6144,0.0986016035079956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,5120,0.08491946856180826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,4096,0.07001492977142335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,3072,0.054048001766204834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,3584,0.06188586552937826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,768,0.024820266167322795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,2560,0.04621119896570842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,2048,0.039927466710408525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,1024,0.02828693389892578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,1536,0.033880531787872314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,512,0.022256000836690267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,256,0.019708800315856933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,128,0.018310399850209554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,64,0.018028799692789713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,32,0.01839146614074707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,65536,1.015174420674642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,12288,0.17000212669372558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,8192,0.11648106575012207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,7168,0.10329279899597169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,10240,0.1413152058919271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,16384,0.2175840059916178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,5120,0.0773034652074178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,6144,0.0899733304977417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,4096,0.061685331662495936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,3072,0.04787413279215495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,3584,0.054440534114837645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,2048,0.036822398503621415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,768,0.023179733753204347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,2560,0.04224746624628703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,1024,0.02523733377456665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,512,0.020753065745035805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,1536,0.030844799677530926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,256,0.018694400787353516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,128,0.017385600010553996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,32,0.017222400506337485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,64,0.017514665921529136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,65536,0.9156821568806967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,12288,0.1550805409749349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,8192,0.10663786729176838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,7168,0.09472426573435465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,10240,0.12958613236745198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,16384,0.19865600268046063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,6144,0.08141333262125651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,5120,0.06895573139190674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,4096,0.054663467407226565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,3584,0.04895786841710408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,3072,0.043350398540496826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,1536,0.028677332401275634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,2560,0.03861120144526164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,2048,0.033557331562042235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,256,0.018040533860524496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,1024,0.023948800563812257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,512,0.019844265778859456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,768,0.021919999519983926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,128,0.016872533162434897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,64,0.01699733336766561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,32,0.01697173317273458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,65536,0.8275530497233072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,7168,0.08456640243530274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,8192,0.09558400313059488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,10240,0.11710399786631268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,12288,0.13656959533691407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,6144,0.07227199872334798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,16384,0.17614399592081706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,5120,0.05982720057169596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,4096,0.047858134905497236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,3072,0.03840746482213338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,3584,0.042940799395243326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,2560,0.033905065059661864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,2048,0.030105600754419964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,1536,0.026178133487701417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,1024,0.022452267011006673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,64,0.016482133666674295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,768,0.020415999492009482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,512,0.018871466318766274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,128,0.01630400021870931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,256,0.01756160060564677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,32,0.01603626708189646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,65536,0.7436042785644531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,8192,0.09069973627726237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,7168,0.07842986583709717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,10240,0.10967466831207276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,12288,0.1292138655980428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,6144,0.06681599617004394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,5120,0.05495680173238119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,16384,0.1659701347351074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,3584,0.03976320028305054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,4096,0.04424533446629842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,2048,0.0283135990301768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,3072,0.03575573364893596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,2560,0.03211626609166463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,512,0.018180267016092936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,1536,0.025120000044504803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,1024,0.021691733598709108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,768,0.019679999351501463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,64,0.016038399934768677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,256,0.016816000143686928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,128,0.01592853367328644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,32,0.015768532951672874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,65536,0.6729525248209636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,8192,0.08602133591969809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,7168,0.07455466588338217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,10240,0.10488533178965251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,12288,0.1226965347925822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,6144,0.06165333191553751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,16384,0.15938560167948407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,3072,0.03354026476542155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,5120,0.05094079971313477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,3584,0.03762773275375366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,4096,0.041389866669972734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,2560,0.03036373257637024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,1536,0.024125866095225015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,2048,0.02719786763191223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,128,0.015635200341542563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,768,0.01953386664390564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,1024,0.021050665775934854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,256,0.016170666615168253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,512,0.017682133118311565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,32,0.015494400262832641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,64,0.015643733739852905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,768,65536,0.6356192270914713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,7168,0.06751360098520914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,8192,0.07929600079854329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,10240,0.0986250638961792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,12288,0.11606613000233967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,6144,0.05597226619720459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,16384,0.1550528049468994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,4096,0.03818026781082153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,5120,0.04545706510543823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,3072,0.03163306713104248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,3584,0.035478401184082034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,2560,0.029316266377766926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,2048,0.025971200068791705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,256,0.01602666676044464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,1536,0.02302186687787374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,768,0.018396800756454466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,512,0.01744106610616048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,1024,0.020261333386103312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,128,0.01536853313446045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,64,0.01544319987297058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,32,0.015310933192571005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,512,65536,0.5888416290283203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,7168,0.06658986806869507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,8192,0.07881706555684408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,10240,0.0981941302617391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,6144,0.05480106671651205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,12288,0.11449706554412842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,16384,0.14774187405904132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,5120,0.045550934473673504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,4096,0.038488535086313884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,3072,0.0313920001188914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,2560,0.028823467095692952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,3584,0.034195200602213545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,2048,0.025921066602071125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,1536,0.023077332973480226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,768,0.018504534165064493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,1024,0.020463999112447104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,256,0.015960533420244852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,512,0.017195733388264973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,64,0.01535040040810903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,128,0.015127467115720114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,32,0.01476693352063497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,256,65536,0.5423893610636393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,10240,0.09597760041554769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,7168,0.06548373301823934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,6144,0.055043200651804604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,8192,0.07669653097788492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,12288,0.11283946832021077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,16384,0.14582187334696453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,5120,0.044299733638763425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,3584,0.03479573329289754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,4096,0.03691626787185669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,3072,0.03097386757532756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,2560,0.02833919922510783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,1536,0.02265066703160604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,2048,0.02571733395258586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,1024,0.020117332537968956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,128,0.015069866180419922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,768,0.018467199802398682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,512,0.017181867361068727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,256,0.015821866194407144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,64,0.015129599968592325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,32,0.014874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,128,65536,0.5300256093343099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,8192,0.0756053368250529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,6144,0.054106668631235755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,10240,0.09512213071187338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,7168,0.06493013302485148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,12288,0.11226346492767333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,16384,0.14493014017740885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,4096,0.035930665334065755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,3584,0.033515731493632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,2560,0.028325333197911577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,3072,0.031176533301671343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,5120,0.04392640193303426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,2048,0.02540053327878316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,1024,0.01986560026804606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,512,0.017059199015299478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,1536,0.022315732638041177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,768,0.018209065993626913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,256,0.01584106683731079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,128,0.01499626636505127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,64,0.015004799763361613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,32,0.015012266238530478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,64,65536,0.5285674730936687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2048,32,65536,0.5249557177225749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,5120,0.5105941454569499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,6144,0.5989930470784505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,7168,0.6819509506225586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,8192,0.7935317357381184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,3584,0.3656031926472982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,4096,0.40668907165527346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,10240,0.9640725453694662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,3072,0.3134197235107422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,1024,0.1273525317509969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,2048,0.22311253547668458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,768,0.10726400216420491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,2560,0.26538987159729005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,1536,0.17854080200195313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,256,0.06652906735738119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,12288,1.168658192952474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,512,0.08209706942240397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,128,0.06212160189946493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,64,0.06265920003255208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,32,0.062174932161966956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,6144,0.16985707283020018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,7168,0.19355200131734213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,16384,1.6333600362141927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,8192,0.22037119865417482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,10240,0.2685930569966634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,12288,0.31644268035888673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,16384,0.4194037437438965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,3072,0.09745279947916666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,3584,0.11087786356608073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,4096,0.1214410702387492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,2560,0.08454720179239908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,1536,0.0598090648651123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,5120,0.1464992046356201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,2048,0.07234666347503663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,128,0.027247999111811323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,1024,0.04710400104522705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,768,0.04177920023600261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,512,0.035571201642354326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,256,0.030433066685994464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,64,0.027135999997456868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,32,0.027319467067718504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,6144,0.13818346659342448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,7168,0.15692373911539714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,8192,0.17593599955240885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,10240,0.21211093266805015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,12288,0.2546453317006429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,16384,0.3348544120788574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,2560,0.06996906598409017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,3584,0.08959573109944662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,4096,0.09990613460540772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,3072,0.07976426283518473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,5120,0.11934506893157959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,1024,0.04077866474787394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,1536,0.05033813317616781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,512,0.03161279956499736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,2048,0.06012906630833944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,768,0.03617493311564128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,256,0.026532266537348432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,64,0.024573866526285806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,32,0.02456106742223104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,128,0.025629866123199462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,7168,0.13558079401652018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,8192,0.1511818726857503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,12288,0.21967360178629555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,10240,0.18689173062642414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,16384,0.2902400016784668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,4096,0.08635413646697998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,5120,0.10185173352559407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,3584,0.07852479616800943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,6144,0.11893866856892903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,3072,0.07000853220621744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,1536,0.0431818683942159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,2048,0.052142934004465735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,65536,1.7092287699381512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,2560,0.061009065310160315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,1024,0.03512959877649943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,768,0.030817067623138426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,128,0.020771199464797975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,512,0.027165865898132323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,64,0.02144533395767212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,256,0.02296746571858724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,32,0.021730132897694907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,65536,1.315130615234375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,7168,0.11465493043263753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,8192,0.12821439901987713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,10240,0.15634986559549968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,12288,0.18635093371073405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,16384,0.24608960151672363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,6144,0.1014677365620931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,3584,0.06727039813995361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,3072,0.059777065118153894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,4096,0.07387946446736654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,5120,0.08762453397115072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,2560,0.0525984009106954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,2048,0.04489599863688151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,1536,0.037028264999389646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,1024,0.030869332949320476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,768,0.02755519946416219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,256,0.020906666914621987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,512,0.024605866273244223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,128,0.019598933060963948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,32,0.019656533002853395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,64,0.019734400510787963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,65536,1.1539979298909506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,10240,0.15384960174560547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,8192,0.1264842669169108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,7168,0.11241280237833659
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,12288,0.1811242739359538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,16384,0.24079039891560874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,6144,0.10068799654642742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,4096,0.07260693709055582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,3584,0.0648362676302592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,5120,0.0859328031539917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,3072,0.05851946671803793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,2560,0.05111999909083048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,2048,0.04369920094807943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,1024,0.030292266607284547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,1536,0.03627946774164836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,256,0.02090453306833903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,768,0.027142399549484254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,512,0.023897600173950196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,128,0.019258666038513183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,32,0.019604265689849854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,64,0.019658666849136353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,65536,0.934935442606608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,8192,0.10903466542561849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,7168,0.09743253389994302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,12288,0.15411307017008463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,10240,0.13188906510670978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,16384,0.20376213391621908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,6144,0.08707839647928874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,2560,0.0442901333173116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,3072,0.0507370670636495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,5120,0.07555946509043375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,3584,0.05701653162638346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,4096,0.06329386631647746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,1536,0.032595199346542356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,2048,0.038475732008616134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,128,0.01875413258870443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,768,0.024949334065119424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,1024,0.02718399961789449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,256,0.01989013353983561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,512,0.022459733486175536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,32,0.01881600022315979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,64,0.019080533583958944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,65536,0.9088160196940104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,8192,0.09695253372192383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,10240,0.11595093409220378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,7168,0.08741866747538249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,12288,0.13639146486918133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,6144,0.07757866382598877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,16384,0.18099199930826823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,5120,0.06751999855041504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,4096,0.05677973429361979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,3584,0.05125866731007894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,3072,0.045132799943288164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,1536,0.030450133482615153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,2560,0.040039467811584475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,2048,0.035189334551493326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,256,0.019244800011316933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,1024,0.02593280076980591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,768,0.024158932765324912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,64,0.01825173298517863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,512,0.021563732624053956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,128,0.018057600657145182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,32,0.018346667289733887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,65536,0.808835220336914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,7168,0.07734933694203695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,8192,0.08538346290588379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,6144,0.0690720001856486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,10240,0.10167466799418132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,12288,0.11859626770019531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,3584,0.04424639940261841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,5120,0.05934079885482788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,16384,0.15788906415303547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,4096,0.04928853511810303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,2560,0.03549439907073974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,3072,0.03969599803288777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,2048,0.03177066644032796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,1536,0.027772800127665205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,1024,0.024126933018366496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,768,0.02104960083961487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,512,0.01992320020993551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,128,0.01741973360379537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,256,0.018114133675893148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,64,0.017214934031168617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,32,0.017434666554133095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,65536,0.6871967951456706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,8192,0.08072640101114908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,7168,0.07247253259023032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,6144,0.06352426608403525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,10240,0.09676480293273926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,12288,0.11364906628926594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,16384,0.1469696044921875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,5120,0.053998935222625735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,2560,0.03305919965108235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,4096,0.04501333236694336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,3072,0.03712960084279378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,3584,0.04030719995498657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,1024,0.02150933345158895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,2048,0.02913706700007121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,1536,0.02541866699854533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,768,0.020038400093714395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,512,0.01883626580238342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,256,0.017410133282343546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,128,0.017288533846537273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,64,0.01735466718673706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,32,0.01758613387743632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,65536,0.5876565297444661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,7168,0.06830933094024658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,10240,0.09094826380411783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,8192,0.07572267055511475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,6144,0.059894398848215735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,12288,0.10507733027140301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,16384,0.13698453903198243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,5120,0.0510538657506307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,4096,0.042721064885457356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,3584,0.038653866449991865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,3072,0.03543786605199178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,2560,0.03164586623509725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,2048,0.02874666651089986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,1024,0.02246613303820292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,1536,0.024551467100779215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,768,0.020358399550120036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,256,0.01736746629079183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,512,0.018966400623321535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,64,0.016985599199930826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,128,0.01662399967511495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,65536,0.5552384058634441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,32,0.016720000902811685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,6144,0.05538880030314127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,8192,0.07082346280415854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,7168,0.0638431986172994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,10240,0.08525226910909017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,12288,0.0979103962580363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,16384,0.12758613427480062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,3072,0.034251733620961504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,5120,0.04729173183441162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,4096,0.04008640050888061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,3584,0.03717759847640991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,2560,0.03097066680590312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,1536,0.024012800057729086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,2048,0.027618134021759035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,1024,0.021555199225743612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,768,0.020216532548268638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,512,0.019156267245610557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,256,0.017489065726598106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,128,0.01663040022055308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,65536,0.5128170649210613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,64,0.016581333676973977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,32,0.016702934106191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,6144,0.04778560002644856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,7168,0.05509973367055258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,10240,0.0769599994023641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,8192,0.06288640101750692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,12288,0.08931840260823568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,16384,0.11502506732940673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,5120,0.041834668318430586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,3584,0.03366719881693522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,4096,0.03588266770044963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,2560,0.028024532397588092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,3072,0.03094826738039653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,1024,0.020102399587631225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,2048,0.02560639977455139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,1536,0.022407466173172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,768,0.01843093236287435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,512,0.018038400014241538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,65536,0.4875626564025879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,256,0.016530133287111917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,128,0.016009599963823954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,64,0.016123732924461363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,32,0.016190933187802632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,6144,0.043081601460774735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,7168,0.049209598700205484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,8192,0.0560479998588562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,10240,0.06940586566925049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,12288,0.08138026396433512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,16384,0.10591999689737956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,3584,0.030503465731938677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,5120,0.0380789319674174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,4096,0.03324693242708842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,2560,0.025914667050043742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,3072,0.027947733799616497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,2048,0.023655466238657632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,1536,0.021286400159200032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,1024,0.019012266397476198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,65536,0.43442026774088544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,768,0.018246400356292724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,128,0.015752533078193666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,512,0.017197867234547935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,256,0.016189866264661155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,64,0.01585813363393148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,32,0.015707733233769734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,6144,0.039628799756368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,7168,0.04495466550191243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,8192,0.05032639900843302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,10240,0.06198826630910238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,12288,0.0742474635442098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,16384,0.09898666540781656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,4096,0.030333866675694782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,5120,0.035393067200978595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,3584,0.029257599512736005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,2048,0.022290132443110146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,3072,0.026705066363016765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,65536,0.3955552101135254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,2560,0.02490453322728475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,1536,0.020500266551971437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,1024,0.01861013372739156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,512,0.01704853375752767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,768,0.017529600858688356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,128,0.015043200055758158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,256,0.015974400440851848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,32,0.015569067001342774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,64,0.015475199619928996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,6144,0.03768426577250163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,8192,0.04741546710332235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,7168,0.042453332742055254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,10240,0.05798613230387369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,12288,0.06985066731770834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,16384,0.09509119987487794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,65536,0.34877440134684246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,3072,0.024940800666809083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,3584,0.027101866404215497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,5120,0.03367253144582112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,4096,0.02786453366279602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,2560,0.02333013415336609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,2048,0.021735467513402305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,768,0.017237333456675212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,1536,0.019732266664505005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,1024,0.017720532417297364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,256,0.015660799543062844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,512,0.01662826637427012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,128,0.015226667126019796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,64,0.01516480048497518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,32,0.01530346671740214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,6144,0.035427200794219973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,7168,0.03922239939371745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,8192,0.0435424009958903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,10240,0.0534112016359965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,12288,0.06428266763687134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,16384,0.09172266324361165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,5120,0.031268266836802165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,768,65536,0.32667414347330725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,4096,0.02775040070215861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,3584,0.025934932629267375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,2560,0.022871466477711995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,2048,0.020857600371042888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,3072,0.024714666604995727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,1536,0.01936533252398173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,1024,0.017753599087397258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,768,0.017092265685399375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,32,0.015218133727709452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,512,0.016386133432388306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,128,0.015211733182271323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,256,0.015448533495267234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,64,0.01518186628818512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,12288,0.062259201208750406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,8192,0.043068798383076985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,6144,0.034024532636006674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,7168,0.0369706670443217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,10240,0.05158079862594604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,16384,0.0840885321299235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,5120,0.03092479904492696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,512,65536,0.30225067138671874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,4096,0.027745066086451213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,3584,0.026332799593607587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,3072,0.02437973419825236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,1536,0.019474132855733236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,2560,0.022712532679239908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,2048,0.021444267034530638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,1024,0.017770665884017944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,128,0.014949333667755128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,768,0.01685653328895569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,512,0.01653439998626709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,256,0.015352533260981242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,32,0.014959999918937683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,64,0.014922666549682616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,7168,0.03720213174819946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,6144,0.03414719899495443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,10240,0.04781973361968994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,8192,0.03948160012563069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,12288,0.05721173286437988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,16384,0.0784661372502645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,256,65536,0.28273919423421223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,2048,0.02078826626141866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,1536,0.01885973413785299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,2560,0.0219541331132253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,3584,0.025655466318130492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,4096,0.026921600103378296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,3072,0.023829332987467446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,5120,0.03028480013211568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,1024,0.017577600479125977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,768,0.016761600971221924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,512,0.015887999534606935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,256,0.015293866395950317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,128,0.014993066589037577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,64,0.014619732896486918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,32,0.014963199694951376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,6144,0.03333866596221924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,8192,0.03927679856618245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,16384,0.07646613121032715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,10240,0.04578773180643718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,7168,0.03638399839401245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,12288,0.05473706722259521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,128,65536,0.2758581479390462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,3584,0.025245867172876996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,2048,0.020502400398254395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,3072,0.023779199520746867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,1536,0.01914560000101725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,2560,0.02248426675796509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,4096,0.02690773407618205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,5120,0.02994453310966492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,1024,0.01745706597963969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,768,0.016739199558893837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,256,0.015314132968584696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,512,0.016289066274960837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,128,0.014985600113868713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,64,0.014881066481272378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,32,0.015133866667747497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,6144,0.03318399985631307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,8192,0.0386186679204305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,10240,0.04456319808959961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,7168,0.03605333169301351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,12288,0.05384106636047363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,64,65536,0.27343358993530276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,16384,0.07575146357218424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,2048,0.02061226765314738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,3584,0.02518506646156311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,4096,0.02693013350168864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,5120,0.02976106603940328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,1536,0.01841920018196106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,2560,0.021911466121673585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,3072,0.023745065927505492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,1024,0.017246933778127034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,128,0.0146506667137146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,512,0.01583146651585897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,768,0.01657919983069102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,256,0.015227733055750528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,64,0.014553599556287131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,32,0.014630400141080222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1024,32,65536,0.27178560892740883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,4096,0.29845333099365234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,5120,0.36459414164225257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,6144,0.4347104072570801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,7168,0.5017130533854167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,8192,0.5737813313802083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,3584,0.2648128032684326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,3072,0.23115413983662925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,2048,0.16531306902567547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,2560,0.1990506649017334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,10240,0.7193450927734375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,1024,0.10084906419118245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,1536,0.13399359385172527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,64,0.051127465565999355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,768,0.08481706778208414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,256,0.055173333485921225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,128,0.05293866793314615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,32,0.050006401538848874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,512,0.06648533344268799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,12288,0.8634442647298177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,65536,16384,1.141115697224935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,7168,0.15025493303934734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,6144,0.13485867182413738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,8192,0.16951573689778646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,10240,0.20622612635294596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,12288,0.24154987335205078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,5120,0.11675519943237304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,3584,0.08725120226542155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,4096,0.09574933052062988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,3072,0.07697386741638183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,16384,0.3152885437011719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,2560,0.06752959887186685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,1536,0.04957013527552287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,2048,0.05837546586990357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,1024,0.03966720104217529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,768,0.035683198769887285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,256,0.026781866947809856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,64,0.025861332813898723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,512,0.031294933954874676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,128,0.025360000133514405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,32,0.024012800057729086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,7168,0.11860373020172119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,6144,0.10458239714304607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,8192,0.13269120057423908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,10240,0.16248106956481934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,12288,0.18964799245198566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,3072,0.06226986646652222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,16384,0.24513386090596517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,4096,0.07640213171641032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,3584,0.06898986498514811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,5120,0.09063573678334555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,2560,0.05435200134913126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,2048,0.04708906809488932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,768,0.029156267642974854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,1024,0.03226240078608195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,1536,0.039211734135945635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,512,0.025859200954437257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,128,0.020665599902470907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,256,0.022900267442067464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,64,0.021222400665283202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,32,0.021193599700927733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,7168,0.10661013126373291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,12288,0.17204052607218426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,8192,0.12010880311330159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,10240,0.1451818625132243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,16384,0.22302719751993813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,5120,0.08282026449839273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,4096,0.06965546607971192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,3584,0.06295573314030965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,6144,0.09429866472880045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,3072,0.056340265274047854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,2560,0.04951680103937785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,16384,65536,1.2329610188802085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,2048,0.04269760052363078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,1536,0.036355201403299967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,512,0.02649173339207967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,1024,0.029688533147176104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,768,0.027034666140874224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,256,0.023862399657567344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,32,0.023964800437291465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,128,0.023720532655715942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,64,0.02374826669692993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,7168,0.09189759890238444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,10240,0.12394879659016926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,8192,0.101909335454305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,12288,0.14316372871398925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,12288,65536,0.9773173650105795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,16384,0.18705387115478517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,3584,0.05397760073343912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,4096,0.05943893194198609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,5120,0.0714581330617269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,3072,0.048981332778930665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,6144,0.0816159963607788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,2560,0.04204053481419881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,1024,0.026738133033116656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,2048,0.03665706713994344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,1536,0.03202133377393086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,768,0.02450559933980306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,512,0.022223999102910362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,256,0.019525333245595296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,128,0.01851093371709188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,64,0.019141334295272826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,32,0.01926506757736206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,10240,65536,0.8541311899820963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,8192,0.09280640284220378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,10240,0.11250560283660889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,7168,0.08383039633433023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,12288,0.13135039806365967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,16384,0.16864959398905438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,4096,0.05486079851786295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,6144,0.07495253086090088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,3584,0.04907093445460002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,5120,0.06446293195088705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,3072,0.04388906558354695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,2048,0.03411200046539307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,2560,0.03896640141805013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,1536,0.029064534107844035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,1024,0.024846933285395303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,256,0.01964799960454305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,768,0.022805333137512207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,512,0.02060799996058146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,128,0.01864746610323588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,64,0.01902186671892802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,32,0.019181867440541588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,8192,65536,0.7204373041788737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,7168,0.07571093241373697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,8192,0.08366613388061524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,16384,0.15030186971028645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,6144,0.06716907024383545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,12288,0.11592000325520832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,10240,0.1002954641977946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,3584,0.04458560148874919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,3072,0.03990506728490194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,2560,0.035665067036946614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,4096,0.04929813146591187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,5120,0.05868159929911295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,2048,0.031864533821741745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,1536,0.027992532650629683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,64,0.017924267053604125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,1024,0.024089600642522177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,256,0.0186901330947876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,768,0.021356799205144248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,512,0.020173867543538414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,128,0.017809067169825235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,32,0.01791999936103821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,7168,65536,0.6610613505045573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,7168,0.07003839810689291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,6144,0.06276053190231323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,8192,0.07810346285502115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,10240,0.09239253203074137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,12288,0.10770026842753093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,5120,0.05463466644287109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,16384,0.13879359563191732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,4096,0.04593493143717448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,3584,0.04147200187047322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,2560,0.03365333477656047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,2048,0.0302784005800883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,3072,0.037595733006795244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,1536,0.02653546730677287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,1024,0.022734934091567995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,768,0.021082667509714763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,256,0.018053332964579262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,512,0.019694934288660683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,6144,65536,0.5758058547973632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,128,0.017234132687250773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,64,0.017322667439778647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,32,0.01698026657104492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,7168,0.06197866598765055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,6144,0.05398826599121094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,10240,0.08217386404673258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,8192,0.06853546301523844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,12288,0.09411413669586181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,16384,0.12006399631500245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,5120,0.04628159999847412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,3584,0.03640960057576497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,4096,0.03942933479944865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,2048,0.027178666989008587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,3072,0.03337493340174357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,2560,0.030025599400202434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,512,0.0186954657236735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,1024,0.020805333058039346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,768,0.01986879905064901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,1536,0.024277333418528238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,256,0.017382399241129557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,5120,65536,0.5205600102742513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,128,0.016752000649770102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,64,0.016820265849431356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,32,0.01690559983253479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,7168,0.05846079985300699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,8192,0.06488746802012126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,6144,0.05198506514231364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,10240,0.07685653368631998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,12288,0.08808746337890624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,16384,0.1120970646540324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,3584,0.034984532992045084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,5120,0.04451520045598348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,4096,0.03769280115763347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,3072,0.032408533493677776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,1536,0.02389226754506429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,2560,0.029601067304611206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,2048,0.026964267094930012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,1024,0.020914133389790854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,4096,65536,0.4532874743143718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,768,0.019876267512639365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,512,0.01885439952214559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,32,0.01723840037981669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,256,0.01769066651662191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,128,0.01735146641731262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,64,0.017298134167989095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,6144,0.047141333421071366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,8192,0.0608789324760437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,10240,0.07167893250783285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,7168,0.053607467810312906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,12288,0.08229333559672038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,16384,0.1037450631459554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,3584,0.03237973252932231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,4096,0.03541546662648519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,3072,0.030155734221140547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,5120,0.04037226835886638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3584,65536,0.4227456092834473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,2560,0.02725333372751872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,2048,0.025050665934880572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,1536,0.022048000494639078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,1024,0.01981546680132548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,768,0.01915839910507202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,512,0.018142932653427125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,32,0.016267733772595723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,256,0.016839466492335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,128,0.016265599926312765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,64,0.016407466928164163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,10240,0.06743893623352051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,8192,0.05615573326746622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,7168,0.04992533524831136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,12288,0.07769813537597656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,6144,0.04345173438390096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,16384,0.09714667002360025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,5120,0.03856639862060547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,4096,0.03355413277943929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,2560,0.026316799720128375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,3584,0.030932267506917317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,3072,0.028761599461237592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,1536,0.021781333287556968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,3072,65536,0.40094718933105467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,2048,0.02409706711769104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,128,0.016105600198109946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,1024,0.019092265764872232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,768,0.018874667088190713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,512,0.01743146578470866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,256,0.01656426688035329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,64,0.015923200050989787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,32,0.015910399953524272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,6144,0.0400330662727356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,7168,0.04617919921875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,8192,0.05273493528366089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,10240,0.062142932415008546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,12288,0.07294507026672363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,16384,0.09468373457590738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2560,65536,0.37447999318440756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,5120,0.03472746610641479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,2560,0.025163733959198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,4096,0.030980267127354938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,3584,0.02876586715380351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,1536,0.020770132541656494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,3072,0.02706986665725708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,2048,0.022702932357788086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,128,0.015793066223462424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,768,0.017805866400400796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,1024,0.01850559910138448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,256,0.016293332974116007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,512,0.01704746683438619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,32,0.015731199582417806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,64,0.015922133127848306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,6144,0.036507733662923175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,7168,0.04052693446477254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,8192,0.045303467909495035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,10240,0.05555626551310221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,12288,0.06549760103225707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,16384,0.08326186339060465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,2048,65536,0.33419841130574546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,4096,0.030904533465703328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,1536,0.02035520076751709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,5120,0.03453119993209839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,2048,0.022430932521820067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,3584,0.028407466411590577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,3072,0.027289599180221558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,2560,0.025300266345342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,768,0.017646932601928712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,1024,0.01844373345375061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,512,0.01691840092341105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,256,0.015910399953524272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,128,0.015568000078201295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,64,0.015686399737993875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,32,0.015518933534622192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,6144,0.03547413349151611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,7168,0.04026240110397339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,10240,0.05252159833908081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,16384,0.08490560054779053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,8192,0.0451477328936259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1536,65536,0.30306774775187173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,12288,0.06368533372879029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,2048,0.020473599433898926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,4096,0.026267733176549273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,5120,0.029811199506123858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,3072,0.02344533403714498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,3584,0.025831466913223265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,1536,0.018883200486501057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,2560,0.0216213325659434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,1024,0.017691733439763387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,768,0.017195733388264973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,256,0.01554026703039805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,64,0.015377066532770791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,512,0.016537599762280784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,128,0.015435733397801719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,32,0.015285332997639975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,1024,65536,0.2668501218159994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,6144,0.0321834663550059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,7168,0.03561919927597046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,10240,0.045507200558980304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,8192,0.038593065738677976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,12288,0.05430399974187215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,16384,0.07299413681030273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,5120,0.03129706581433614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,2560,0.02207039992014567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,3584,0.025297067562739056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,3072,0.024099200963974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,4096,0.027293866872787474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,2048,0.020507733027140297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,1024,0.017454934120178223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,1536,0.019061332941055296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,32,0.015191466609636942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,768,0.017014400164286295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,512,0.016302933295567833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,64,0.015130666891733804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,256,0.015525333086649575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,128,0.01544319987297058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,768,65536,0.25110079447428385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,6144,0.030593067407608032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,8192,0.03669120073318481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,7168,0.03404373327891032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,12288,0.05012586514155069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,10240,0.04297279914220174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,16384,0.07003413041432699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,5120,0.0279530664285024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,3584,0.023578667640686037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,4096,0.025191466013590496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,3072,0.022487467527389525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,1536,0.018259199460347493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,1024,0.017100799083709716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,2560,0.021195733547210695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,2048,0.0196234663327535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,64,0.014985600113868713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,768,0.016484266519546507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,512,0.016064000129699708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,128,0.01503679951032003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,256,0.0151829332113266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,32,0.014999467134475707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,8192,0.035675732294718425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,512,65536,0.23466879526774087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,10240,0.04154560168584188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,12288,0.04770026604334514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,6144,0.029808000723520918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,7168,0.03218560020128886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,16384,0.06276373465855917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,5120,0.02696533401807149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,1536,0.01796906590461731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,3072,0.021644800901412964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,3584,0.023082667589187623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,2048,0.01924053430557251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,4096,0.02467199961344401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,2560,0.02060799996058146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,1024,0.01694399913152059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,768,0.016642133394877114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,512,0.015862400333086647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,128,0.014822399616241455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,256,0.01525759994983673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,32,0.01481066644191742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,64,0.014883200327555338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,256,65536,0.2197322686513265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,16384,0.05786240100860596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,6144,0.03006933331489563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,7168,0.032630399862925215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,12288,0.044004265467325845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,8192,0.034430932998657224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,10240,0.0396448016166687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,5120,0.027077333132425947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,4096,0.0243776003519694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,2560,0.020146133502324422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,3584,0.023400533199310302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,2048,0.01927466591199239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,1024,0.016790399948755898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,768,0.016310399770736693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,3072,0.022040534019470214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,1536,0.018077866236368815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,32,0.014874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,512,0.015757866700490317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,64,0.014889599879582724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,128,0.014873600006103516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,256,0.015050666530927024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,128,65536,0.21255146662394203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,6144,0.029590400060017903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,7168,0.03166186610857646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,16384,0.055555200576782225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,8192,0.03350079854329427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,10240,0.0382261315981547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,12288,0.042650667826334636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,5120,0.026613332827885944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,1024,0.016764799753824867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,1536,0.017905066410700478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,2048,0.019038933515548705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,3072,0.021543467044830324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,2560,0.020387200514475505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,4096,0.024371200799942018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,3584,0.022937599817911783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,768,0.01625920037428538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,512,0.015425067146619162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,64,0.014661332964897156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,128,0.014803199966748556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,256,0.014966400464375815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,32,0.014734933773676554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,64,65536,0.21022613843282065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,12288,0.041278934478759764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,16384,0.05460053284962972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,6144,0.02959146698315938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,7168,0.03163626591364543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,8192,0.03332479993502299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,10240,0.03705600102742513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,5120,0.026712532838185626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,4096,0.024260266621907552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,3584,0.022714666525522866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,1024,0.016669867436091106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,2048,0.019015467166900633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,3072,0.021826134125391642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,2560,0.02005866765975952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,1536,0.01766506632169088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,768,0.016201600432395935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,512,0.015599999825159708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,256,0.015072000026702882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,128,0.014738133549690247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,64,0.014863999684651694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,32,0.014546133081118264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,768,32,65536,0.20916266441345216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,4096,0.20695892969767252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,5120,0.25654932657877605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,6144,0.299675718943278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,7168,0.3432095845540365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,8192,0.3875242551167806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,10240,0.481218147277832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,3072,0.16437546412150067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,2560,0.1416159947713216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,3584,0.18415360450744628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,2048,0.11853653589884441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,1024,0.07443626721700033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,1536,0.09583360354105631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,12288,0.5879306793212891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,256,0.04321386814117432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,64,0.0397546648979187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,512,0.051699201265970864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,128,0.0389354666074117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,768,0.06305493513743082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,32,0.04015359878540039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,6144,0.0913365364074707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,7168,0.10543253421783447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,8192,0.11685653527577718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,12288,0.1655669371287028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,10240,0.1416437307993571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,65536,16384,0.7751189549763997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,5120,0.07944746812184653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,3072,0.05486079851786295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,16384,0.21394133567810059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,4096,0.06735893090566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,3584,0.06115520000457764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,2560,0.048519468307495116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,2048,0.04253333409627279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,1024,0.030136533578236896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,768,0.026868265867233277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,1536,0.03588266770044963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,32,0.019950934251149497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,128,0.019824000199635823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,512,0.02418773372968038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,256,0.02135146657625834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,64,0.019820799430211387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,6144,0.07703893184661866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,7168,0.08697706858317057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,8192,0.09773120085398355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,12288,0.13621333440144856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,10240,0.11680853366851807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,16384,0.17508692741394044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,2560,0.04116693337758382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,3072,0.046223998069763184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,5120,0.06673279603322348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,4096,0.057017600536346434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,3584,0.05129599968592326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,1024,0.026655999819437663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,768,0.02444266676902771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,1536,0.03125866651535034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,2048,0.03614613215128581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,512,0.022261333465576173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,128,0.01841493248939514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,256,0.019342933098475137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,64,0.018915200233459474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,32,0.019132800896962485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,12288,0.11771626472473144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,10240,0.1018783966700236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,7168,0.07622719605763753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,8192,0.08527572949727377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,16384,0.15134399731953938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,6144,0.06762986977895101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,4096,0.05050346851348877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,5120,0.05935893456141154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,3584,0.046216531594594316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,16384,65536,0.8514581044514975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,3072,0.04208960135777791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,2560,0.03726079861323039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,768,0.023808000485102336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,2048,0.03295893271764119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,1536,0.028730666637420653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,1024,0.024932267268498738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,512,0.021244800090789794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,256,0.019045333067576088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,128,0.018157867590586345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,64,0.01834239959716797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,32,0.018256000677744546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,10240,0.08731839656829835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,12288,65536,0.6759562810262044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,7168,0.06542400121688843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,12288,0.10002666314442951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,8192,0.07248106797536215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,16384,0.12820053100585938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,3584,0.04043733278910319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,3072,0.03641706705093384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,4096,0.043722665309906004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,2560,0.032542934020360306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,6144,0.05871466795603434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,5120,0.052045865853627526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,2048,0.029182932774225873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,128,0.017083734273910522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,1536,0.02636373241742452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,512,0.019130667050679527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,768,0.021372799078623453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,1024,0.022743467489878336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,256,0.01804479956626892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,64,0.01738133430480957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,32,0.017538134256998697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,10240,65536,0.5693557103474934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,8192,0.06718186537424722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,6144,0.054324265321095785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,10240,0.08017280101776122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,7168,0.060515201091766356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,12288,0.09374399979909262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,16384,0.11805546283721924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,5120,0.046724267800649005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,4096,0.039985068639119464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,3584,0.036855467160542804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,3072,0.032985599835713704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,2560,0.02988160053888957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,1536,0.024149332443873087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,2048,0.0271616001923879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,128,0.017128533124923705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,1024,0.02086720069249471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,512,0.018322134017944337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,768,0.019514666001001994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,256,0.017206400632858276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,8192,65536,0.4838229179382324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,32,0.017356799046198527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,64,0.017245866854985557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,7168,0.056314667065938316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,6144,0.050374400615692136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,8192,0.06254826784133911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,10240,0.07416640122731527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,12288,0.08556479612986247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,5120,0.044402134418487546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,16384,0.10820586681365967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,3584,0.034392531712849936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,4096,0.037811199824015304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,2048,0.02635306715965271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,3072,0.0314410666624705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,2560,0.02906559904416402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,1536,0.024065067370732628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,512,0.018105600277582803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,1024,0.020637865861256918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,768,0.01895786722501119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,256,0.017113600174585977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,7168,65536,0.4476629257202148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,32,0.01669013301531474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,128,0.016509866714477538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,64,0.01676586667696635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,6144,0.046697600682576494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,8192,0.057505067189534506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,7168,0.05206613143285116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,10240,0.06827627023061117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,12288,0.07861973444620768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,16384,0.09822826385498047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,4096,0.03519573211669922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,5120,0.0408405343691508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,3072,0.030356266101201373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,3584,0.03275413314501445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,2560,0.0276853342851003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,768,0.019141334295272826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,1536,0.023412267367045082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,2048,0.025443200270334882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,1024,0.02072640061378479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,512,0.018425599733988444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,6144,65536,0.39626986185709634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,128,0.016702934106191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,256,0.01731520096460978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,64,0.0166293332974116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,32,0.0166293332974116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,7168,0.045015466213226316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,6144,0.03985706567764282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,8192,0.050154666105906166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,10240,0.06005013386408488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,12288,0.0679701328277588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,5120,0.03517760038375854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,16384,0.08540053367614746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,4096,0.03154346744219462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,2048,0.02318293253580729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,2560,0.02505386670430501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,3072,0.027076266209284466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,3584,0.02871893246968587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,5120,65536,0.37371412913004554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,768,0.018602667252222697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,1536,0.02061226765314738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,1024,0.01895573337872823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,256,0.016452266772588094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,512,0.017262933651606242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,128,0.01581546664237976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,32,0.016082132856051125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,64,0.016243199507395424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,7168,0.041621331373850504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,8192,0.046411732832590744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,6144,0.037138132254282634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,10240,0.05578240156173706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,12288,0.06496426661809286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,16384,0.07970346609751383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,5120,0.03338026603062948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,1536,0.019937066237131755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,3584,0.027703466018040974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,4096,0.029706666866938274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,4096,65536,0.31985066731770834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,2560,0.024099200963974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,2048,0.022129066785176597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,3072,0.025722666581471758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,1024,0.018742400407791137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,512,0.01678933302561442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,768,0.017825067043304443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,128,0.01594239970048269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,256,0.016500266393025716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,64,0.015761066476504007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,32,0.015904000401496886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,10240,0.053115733464558924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,8192,0.04324373404184977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,7168,0.038489600022633866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,6144,0.03511893351872762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,5120,0.03187519907951355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,12288,0.060312533378601076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,16384,0.07500800291697184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,3584,0.02677653431892395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,4096,0.02826133370399475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,3072,0.024680533011754355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3584,65536,0.28404267628987634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,2560,0.023178666830062866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,2048,0.020777599016825358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,128,0.015658666690190635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,1024,0.018078933159510292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,1536,0.018918399016062418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,512,0.016746666034062704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,768,0.017866667111714682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,256,0.016049066185951234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,32,0.01594986617565155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,64,0.015818666418393454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,7168,0.036304001013437906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,8192,0.0400000015894572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,12288,0.05589013497034708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,10240,0.047858134905497236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,6144,0.0329258660475413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,16384,0.07069013118743897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,3072,65536,0.27871786753336586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,5120,0.03025066653887431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,4096,0.026920533180236815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,3584,0.02521066665649414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,3072,0.023766400416692098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,2560,0.021590399742126464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,1024,0.017968000968297322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,1536,0.019078399737675986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,2048,0.01948480010032654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,768,0.017423999309539796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,64,0.015596800049146018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,512,0.016578132907549538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,256,0.01611840029557546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,128,0.015388799707094827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,32,0.01542080044746399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,6144,0.03186666568120321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,12288,0.05484586556752523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,8192,0.03957546552022298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,7168,0.03518400192260742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,10240,0.04643413225809733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,16384,0.07060800393422445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2560,65536,0.2575413386027018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,5120,0.02928000092506409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,4096,0.026521599292755126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,3584,0.02512960036595662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,3072,0.023271467288335165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,2560,0.0208512008190155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,1024,0.01763413349787394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,2048,0.019871999820073448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,1536,0.01854506731033325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,768,0.01693120002746582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,64,0.015383467078208923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,512,0.016476800044377647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,256,0.015748266379038492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,128,0.01535040040810903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,32,0.01532266636689504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,12288,0.049167998631795246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,7168,0.033155200878779094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,8192,0.03685226837793986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,2048,65536,0.2333397388458252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,10240,0.04190719922383626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,16384,0.06745279630025228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,6144,0.030039467414220172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,5120,0.02765760024388631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,2048,0.019230933984120686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,4096,0.0243231991926829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,1536,0.01802133321762085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,3584,0.024043732881546022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,3072,0.02093120018641154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,2560,0.020346667369206747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,768,0.016428800423940022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,1024,0.017043199141820273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,512,0.016105600198109946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,256,0.015397333105405173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,128,0.01498240033785502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,64,0.015194666385650635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,32,0.015196800231933594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1536,65536,0.2075551986694336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,16384,0.05980799992879232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,7168,0.030113067229588824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,6144,0.027808000644048054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,8192,0.03279680013656616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,12288,0.04110933144887288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,10240,0.03688746690750122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,4096,0.02215786576271057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,5120,0.02574293414751689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,1024,0.0168938676516215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,3584,0.021654399236043294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,3072,0.020703999201456706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,1536,0.017514665921529136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,2048,0.018515199422836304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,2560,0.01967786749204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,768,0.0163263996442159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,512,0.01578986644744873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,256,0.015293866395950317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,128,0.015124266346295675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,64,0.015083733201026916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,32,0.015140266219774882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,1024,65536,0.18343572616577147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,12288,0.04120106697082519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,8192,0.03201066652933757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,16384,0.05578986803690592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,10240,0.03593706687291463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,6144,0.026974932352701826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,7168,0.029433600107828778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,5120,0.024141865968704223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,4096,0.02251840035120646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,3584,0.02183039983113607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,2560,0.019219199816385903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,3072,0.020680532852808634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,1024,0.016781866550445557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,1536,0.01758613387743632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,2048,0.01848213275273641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,768,0.016587733229001363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,256,0.015072000026702882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,512,0.015836800138155617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,64,0.014903466900189719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,128,0.015027200182278952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,32,0.01483626663684845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,768,65536,0.1720479965209961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,16384,0.04832213322321574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,8192,0.03173440098762512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,6144,0.026852265993754072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,12288,0.03940586646397908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,10240,0.03595199982325236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,7168,0.028326400121053058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,5120,0.024692267179489136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,3584,0.021635200579961142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,4096,0.022657066583633423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,3072,0.02044586737950643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,2560,0.01934079925219218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,1536,0.017272533973058064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,2048,0.01875200072924296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,768,0.016264533003171287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,1024,0.016827734311421712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,512,0.0157642662525177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,256,0.015335466464360556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,128,0.014689067006111145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,64,0.014758400122324624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,65536,0.1619413375854492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,512,32,0.014838400483131408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,12288,0.03534613450368245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,16384,0.04296640157699585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,8192,0.029018666346867877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,10240,0.0324458658695221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,4096,0.022090667486190797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,6144,0.026318933566411334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,5120,0.024277333418528238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,7168,0.027677865823109944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,2048,0.018284799655278523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,3584,0.021196800470352172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,3072,0.019935999313990274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,2560,0.019001599152882895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,1536,0.017214934031168617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,768,0.01599679986635844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,1024,0.016761600971221924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,512,0.015370666980743408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,65536,0.15413014094034833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,256,0.015054933230082192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,128,0.014707199732462563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,64,0.014819199840227762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,256,32,0.014758400122324624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,12288,0.033725865681966144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,10240,0.031524266799290976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,16384,0.04162559906641643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,6144,0.026022400458653765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,7168,0.02770986755688985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,8192,0.02872213323911031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,5120,0.023916800816853843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,3584,0.021191465854644775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,4096,0.022104533513387044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,3072,0.0200053334236145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,1536,0.017077332735061644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,1024,0.016433067123095193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,2560,0.018994132677714028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,2048,0.017735467354456583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,768,0.01632426679134369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,512,0.015480533242225647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,256,0.01530026694138845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,65536,0.14824426968892415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,128,0.014578133821487427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,64,0.01502293348312378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,128,32,0.014670933286348978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,12288,0.03314666748046875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,16384,0.040054400761922196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,7168,0.027369600534439088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,8192,0.028304000695546467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,10240,0.031194667021433514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,5120,0.023572266101837158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,6144,0.025945599873860675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,4096,0.02172373334566752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,3584,0.020935465892155967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,3072,0.019961599508921304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,2048,0.017944532632827758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,2560,0.018796799580256145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,768,0.015837867061297098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,1536,0.016936532656351724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,1024,0.016386133432388306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,512,0.01542080044746399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,65536,0.14636799494425456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,32,0.014472533265749613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,128,0.014599466323852539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,64,0.014642133315404256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,64,256,0.014891733725865683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,12288,0.03312746683756511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,16384,0.039654401938120525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,8192,0.02813653349876404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,10240,0.030722133318583172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,7168,0.027824000517527266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,6144,0.026021333535512288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,5120,0.023906133572260537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,4096,0.021900800863901775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,3072,0.019663999478022255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,3584,0.020961066087086998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,2560,0.01855573256810506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,2048,0.01786880095799764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,1536,0.016942934195200602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,1024,0.01649386684099833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,512,0.015319466590881348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,768,0.015629866719245912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,256,0.015086932977040609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,65536,0.14476799964904785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,128,0.014446933070818582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,64,0.014778666694959006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,512,32,32,0.014509866635004679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,4096,0.16463252703348796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,5120,0.19924052556355792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,6144,0.23751999537150065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,7168,0.2651040077209473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,8192,0.30120105743408204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,10240,0.3828127861022949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,3584,0.14742186864217122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,3072,0.13067946434020997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,2560,0.11365013122558594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,1536,0.07736426989237467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,12288,0.4441237449645996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,1024,0.06255786816279094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,2048,0.09488000075022379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,768,0.055676798025767006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,512,0.04800853331883748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,256,0.038363734881083175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,128,0.03553920189539592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,32,0.03571199973424276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,64,0.03582826852798462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,6144,0.07400960127512614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,7168,0.0846933364868164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,8192,0.09368320306142172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,10240,0.11437653700510661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,12288,0.1321237325668335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,65536,16384,0.6015829086303711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,4096,0.05553386608759562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,5120,0.06451626618703207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,16384,0.1701375961303711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,3584,0.05006080071131388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,1024,0.02646080056826274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,1536,0.031014400720596313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,3072,0.045083733399709065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,2048,0.03576426506042481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,768,0.024514132738113405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,512,0.022350933154424033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,2560,0.04012906551361084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,256,0.019722666343053183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,128,0.018819200992584228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,64,0.018811732530593872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,32,0.01916159987449646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,8192,0.07605226834615073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,7168,0.067958402633667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,6144,0.060210132598876955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,10240,0.08970560232798258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,12288,0.1042848030726115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,16384,0.13253119786580403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,3072,0.03784426848093669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,4096,0.04548159837722778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,2560,0.03445546627044678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,5120,0.05311893224716187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,3584,0.04135893185933431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,1536,0.027170133590698243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,2048,0.030423466364542646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,1024,0.02368639906247457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,128,0.01760853330294291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,512,0.02002453406651815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,768,0.022408533096313476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,256,0.018293333053588868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,64,0.017927465836207072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,32,0.01797119975090027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,7168,0.06152640183766683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,10240,0.08253440062204996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,8192,0.06846613089243571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,12288,0.09502186775207519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,16384,0.12193173567454021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,3072,0.035309867064158125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,4096,0.04176853497823079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,3584,0.038881067434946695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,6144,0.05479146639506022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,5120,0.048238933086395264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,2560,0.03171413342158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,2048,0.028755199909210206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,16384,65536,0.6450112024943034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,512,0.018541866540908815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,1024,0.022631466388702393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,1536,0.025755733251571655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,768,0.02127893368403117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,64,0.017242666085561117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,256,0.01803306738535563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,32,0.017106133699417114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,128,0.017242666085561117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,12288,65536,0.4937450726826985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,12288,0.08169493675231934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,8192,0.05886079867680868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,10240,0.069595734278361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,7168,0.05320533514022827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,16384,0.1029205322265625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,6144,0.04795733292897542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,3584,0.03336319923400879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,5120,0.04235200087229411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,3072,0.030742400884628297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,4096,0.03612373272577922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,2560,0.027957334121068315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,2048,0.02579946716626485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,1024,0.020820266008377074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,1536,0.023628799120585124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,768,0.019432532787322997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,10240,65536,0.44437227249145506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,64,0.01683626572291056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,512,0.018155733744303383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,256,0.017443199952443443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,128,0.016531200210253397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,32,0.017011199394861856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,6144,0.044622933864593504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,12288,0.07529066403706869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,8192,0.05462719996770223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,7168,0.049865599473317465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,10240,0.06491626501083374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,16384,0.09386346340179444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,4096,0.03470613161722819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,2048,0.025473066171010333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,3072,0.029740800460179646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,3584,0.03219839930534363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,2560,0.027281065781911213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,5120,0.04001813332239787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,1536,0.023280000686645506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,1024,0.021466666460037233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,8192,65536,0.37092908223470056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,768,0.019730132818222047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,512,0.018668800592422485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,256,0.017683200041453042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,128,0.017179733514785765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,64,0.017322667439778647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,32,0.017460266749064125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,6144,0.040883199373881025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,7168,0.04597973426183065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,8192,0.04989333152770996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,10240,0.059443199634552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,12288,0.06771306991577149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,16384,0.08606080214182535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,5120,0.0365941325823466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,4096,0.03191360036532084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,3072,0.027565866708755493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,3584,0.029898667335510255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,2560,0.025378133853276568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,1024,0.018870399395624796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,1536,0.021894399325052896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,2048,0.023474133014678954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,128,0.016100266575813295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,768,0.018181333939234413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,512,0.01758079926172892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,64,0.01625920037428538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,256,0.016755199432373045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,7168,65536,0.3328714688618978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,32,0.016428800423940022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,6144,0.03852266470591227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,10240,0.054951465129852294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,7168,0.042556798458099364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,8192,0.046748801072438555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,12288,0.06219413280487061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,4096,0.029951999584833782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,16384,0.07884159882863363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,5120,0.03337386846542358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,2048,0.02281279961268107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,3584,0.028009599447250365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,3072,0.02609279950459798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,2560,0.024204800526301064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,6144,65536,0.3180661201477051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,1536,0.020125865936279297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,768,0.01809813380241394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,1024,0.01885333259900411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,512,0.017016534010569254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,256,0.01650879979133606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,128,0.01599253316720327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,64,0.015357866883277893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,32,0.015845333536465965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,7168,0.03991359869639079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,6144,0.03569066524505615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,8192,0.04518186648686727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,16384,0.07950080235799153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,10240,0.05183360179265341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,12288,0.05969280004501343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,5120,0.03009706735610962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,5120,65536,0.29095681508382165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,3584,0.0255295991897583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,4096,0.027027199665705364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,3072,0.024089600642522177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,2048,0.02111146648724874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,2560,0.022394667069117226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,1024,0.01825066606203715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,1536,0.019633066654205323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,768,0.017461333672205606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,512,0.016582399606704712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,32,0.0160671999057134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,256,0.016099199652671814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,128,0.015736533204714458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,64,0.0158869336048762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,6144,0.032840534051259355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,7168,0.036152533690134686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,10240,0.04787946542104085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,8192,0.04050240119298299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,4096,65536,0.24758613904317223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,12288,0.05415786504745483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,16384,0.06678826808929443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,3584,0.026474666595458985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,5120,0.03086079955101013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,4096,0.028198399146397907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,3072,0.024734934171040855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,2560,0.022875734170277915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,1536,0.019172267119089762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,1024,0.018313600619633993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,2048,0.021291732788085938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,768,0.017745065689086913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,128,0.01564800043900808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,512,0.01690773367881775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,256,0.016189866264661155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,32,0.016108799974123636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,64,0.01583573321501414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,7168,0.03370453516642253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,6144,0.030808534224828082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,8192,0.037178667386372884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,10240,0.04416746695836385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,12288,0.05050986607869466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,16384,0.06279999812444051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,5120,0.02997973362604777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3584,65536,0.23429439862569174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,4096,0.026794666051864625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,3584,0.025681066513061523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,1536,0.01909653345743815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,3072,0.023829332987467446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,2560,0.021682133277257286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,2048,0.02076479991277059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,1024,0.017833600441614784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,768,0.01716053287188212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,256,0.01588053305943807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,512,0.01634880006313324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,64,0.015370666980743408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,128,0.01537493367989858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,32,0.015501866738001505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,8192,0.03509013255437215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,12288,0.047975468635559085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,6144,0.029282132784525555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,10240,0.041895465056101484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,7168,0.03161919911702474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,3072,65536,0.21599359512329103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,16384,0.05970773299535116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,5120,0.0265066663424174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,4096,0.02429973284403483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,3584,0.023118933041890465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,2560,0.020666666825612388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,3072,0.022028799851735434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,2048,0.018706132968266807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,1536,0.018965333700180054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,1024,0.017451733350753784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,768,0.016809600591659545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,512,0.01635840038458506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,256,0.015686399737993875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,128,0.015154133240381876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,64,0.015264000495274863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,32,0.015237333377202353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,16384,0.06164266665776571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2560,65536,0.2006389300028483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,6144,0.0294378658135732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,8192,0.0352565328280131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,10240,0.040686933199564616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,7168,0.03225173354148865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,12288,0.048135467370351154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,5120,0.0258240004380544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,1024,0.017110399405161538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,4096,0.022911999622980753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,3584,0.02193066676457723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,3072,0.02026346723238627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,2560,0.019349332650502524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,2048,0.018194133043289186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,1536,0.01772480010986328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,768,0.016676266988118492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,128,0.015086932977040609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,512,0.0161461333433787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,256,0.015801599621772765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,64,0.01523413360118866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,32,0.015228799978892007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,2048,65536,0.17851840655008952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,16384,0.05466026862462362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,5120,0.02563520073890686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,8192,0.03149440089861552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,6144,0.026565333207448322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,7168,0.02897599935531616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,10240,0.03583039840062459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,12288,0.04040106534957886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,3584,0.02181333303451538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,4096,0.023437867561976113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,3072,0.02026453415552775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,2560,0.019139200448989868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,2048,0.018332799275716148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,1536,0.017828265825907387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,1024,0.016804265975952148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,768,0.016481066743532814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,512,0.015793066223462424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,256,0.015727999806404113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,128,0.01498133341471354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,65536,0.15945706367492676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,64,0.0152319997549057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1536,32,0.015361066659291586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,12288,0.03649493455886841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,16384,0.05099093516667684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,8192,0.029128533601760865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,6144,0.025436800718307496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,5120,0.023894399404525757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,7168,0.02717546621958415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,10240,0.032869333028793336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,4096,0.021924267212549843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,3584,0.02044693430264791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,3072,0.019435733556747437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,768,0.016130133469899496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,2560,0.018569600582122803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,2048,0.018028799692789713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,1024,0.016752000649770102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,1536,0.017151999473571777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,512,0.015677866339683533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,65536,0.14529813130696614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,128,0.01490133305390676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,256,0.015214932958285013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,64,0.01495039959748586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,1024,32,0.015046399831771851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,12288,0.037164799372355145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,16384,0.04484800100326538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,8192,0.031011199951171874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,10240,0.03378880023956299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,7168,0.026174932718276978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,5120,0.023357866207758586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,6144,0.025081600745519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,3072,0.019540266195933024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,4096,0.021077332894007365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,3584,0.02034133275349935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,2560,0.018551466862360637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,768,0.01611733337243398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,2048,0.017749333381652833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,1024,0.01670080025990804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,1536,0.01731626590092977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,512,0.01567466656366984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,65536,0.13615040779113768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,64,0.01495253344376882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,256,0.01532906691233317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,128,0.014941866199175516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,768,32,0.014823466539382935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,8192,0.027254400650660197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,12288,0.03474453290303548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,16384,0.04099733432133992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,7168,0.025914667050043742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,10240,0.03139306704203288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,5120,0.022281599044799805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,6144,0.024409600098927817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,4096,0.02081813414891561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,3584,0.02004800041516622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,3072,0.01919999917348226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,2560,0.01814613342285156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,2048,0.017831466595331826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,1536,0.01695786714553833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,768,0.015734400351842245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,1024,0.01637226641178131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,512,0.01534293293952942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,65536,0.12917013168334962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,256,0.0150709331035614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,128,0.014738133549690247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,64,0.01477013329664866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,512,32,0.014622933665911355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,12288,0.031345067421595256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,16384,0.03631573518117269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,10240,0.02903573314348857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,8192,0.02738986611366272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,4096,0.020846933126449585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,6144,0.024370133876800537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,7168,0.02579946716626485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,5120,0.021963733434677123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,2048,0.01755519906679789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,3584,0.019658666849136353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,3072,0.018889600038528444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,2560,0.018186666568120322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,65536,0.12059199810028076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,1536,0.01702186663945516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,768,0.015579733252525329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,1024,0.016241066654523215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,512,0.01535040040810903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,256,0.014843733112017313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,32,0.014829867084821067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,128,0.014777599771817525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,256,64,0.014713600277900696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,7168,0.025473066171010333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,10240,0.02799359957377116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,16384,0.03533226648966471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,12288,0.02969706654548645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,8192,0.025916800896326704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,5120,0.022363734245300294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,6144,0.023989333709081014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,4096,0.02074133356412252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,3584,0.019377066691716512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,3072,0.018631466229756675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,2560,0.017850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,2048,0.017142399152119955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,65536,0.11636586983998616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,1536,0.016999467213948568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,1024,0.01600213348865509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,768,0.015717333555221556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,512,0.015373866756757101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,128,0.014667733510335287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,256,0.014709333578745524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,64,0.014510933558146158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,128,32,0.014640000462532044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,12288,0.02976106603940328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,16384,0.03404693206151326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,10240,0.027884799242019653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,5120,0.022021333376566567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,6144,0.024081067244211832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,7168,0.024936532974243163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,8192,0.025843199094136553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,3072,0.01858453353246053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,4096,0.02059946656227112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,3584,0.01939520041147868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,768,0.015711999932924905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,2560,0.017810134092966716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,2048,0.01716586748758952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,1536,0.016482133666674295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,1024,0.016139733791351318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,65536,0.11446506977081299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,512,0.014888532956441245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,256,0.014929067095120749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,128,0.014661332964897156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,64,0.014663466811180114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,64,32,0.014533332983652749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,10240,0.028090665737787884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,12288,0.029364265998204547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,7168,0.02484906713167826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,16384,0.03345599969228109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,8192,0.02567039926846822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,6144,0.023938133319218954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,5120,0.022089600563049316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,3072,0.01874986688296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,4096,0.020499199628829956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,3584,0.019603200753529868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,2560,0.017806933323542277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,2048,0.01718506614367167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,768,0.015524267156918844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,1536,0.01686613361040751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,1024,0.01620266636212667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,65536,0.11363840103149414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,512,0.01532266636689504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,256,0.01508799990018209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,128,0.014913066228230795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,32,0.014403200149536133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,384,32,64,0.014473600188891092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,5120,0.14053972562154132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,4096,0.11670400301615398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,6144,0.1679189364115397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,7168,0.18951786359151204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,8192,0.2162602742513021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,3584,0.10595306555430095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,3072,0.09370559851328532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,10240,0.2669610659281413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,1536,0.05872213443120321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,2048,0.0700650691986084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,2560,0.08274986743927001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,256,0.031702399253845215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,768,0.04247359832127889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,1024,0.04749333461125692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,128,0.027710932493209838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,64,0.028288000822067262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,32,0.03133973280588786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,512,0.03826773166656494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,12288,0.3100138664245605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,8192,0.06832533677419027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,7168,0.06090453465779623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,6144,0.05637439886728922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,10240,0.08204800287882487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,12288,0.09544959863026937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,65536,16384,0.4064789454142253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,16384,0.12088212966918946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,4096,0.041623465220133465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,5120,0.04809600114822388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,3584,0.03824106852213542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,1536,0.02600853244463603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,3072,0.03517546653747559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,2560,0.03236053387324016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,2048,0.029740800460179646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,768,0.021741867065429688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,1024,0.02312320073445638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,512,0.020191999276479085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,256,0.017539199193318686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,128,0.017008000612258913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,64,0.017605332533518474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,32,0.017810134092966716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,10240,0.06690879662831625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,7168,0.05067946513493856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,6144,0.04552213350931804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,8192,0.056209067503611244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,12288,0.07747946580251058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,16384,0.10005866686503093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,5120,0.04084159930547078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,3584,0.033266133069992064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,2560,0.027982934315999346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,3072,0.030305065711339313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,4096,0.035792001088460285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,1536,0.02379946708679199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,2048,0.025571199258168538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,128,0.01646613379319509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,1024,0.021267199516296388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,768,0.01995733380317688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,256,0.01736533244450887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,512,0.01780479947725932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,32,0.01725013256072998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,64,0.016685867309570314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,8192,0.051668266455332436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,7168,0.04663893381754557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,10240,0.060946134726206455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,12288,0.07010773022969564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,16384,0.08857493400573731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,6144,0.042555733521779375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,4096,0.033675734202067056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,5120,0.03772799968719483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,2560,0.027163734038670857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,3072,0.02927253246307373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,3584,0.03188266754150391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,16384,65536,0.43569494883219406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,1024,0.02114560008049011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,2048,0.025217066208521526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,1536,0.022988800207773843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,768,0.019795199235280357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,512,0.018131200472513834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,256,0.016446933150291443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,128,0.016502400239308677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,64,0.016684800386428833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,12288,65536,0.3600693384806315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,32,0.016504533092180886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,10240,0.05206293265024821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,7168,0.042556798458099364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,12288,0.05948586861292521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,6144,0.036954665184021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,8192,0.04471679925918579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,16384,0.07567146619160971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,4096,0.029703466097513835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,3584,0.02751893401145935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,3072,0.0255840003490448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,5120,0.03330666621526082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,2560,0.023785599072774253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,2048,0.022539732853571574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,1536,0.02115946610768636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,512,0.017230933904647826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,1024,0.019193599621454872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,768,0.017241599162419637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,10240,65536,0.321836789449056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,256,0.016309332847595216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,64,0.016173866391181946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,128,0.01632213294506073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,32,0.016476800044377647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,6144,0.034653866291046144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,8192,0.04205866654713948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,7168,0.03819626569747925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,10240,0.047907201449076335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,12288,0.05460799932479858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,16384,0.06942506631215414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,4096,0.027811199426651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,3584,0.026077866554260254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,5120,0.031224532922108965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,3072,0.024380799134572348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,1536,0.020435200134913126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,2560,0.023085866371790567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,2048,0.02163413365681966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,1024,0.018547199169794717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,768,0.017181867361068727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,256,0.01646933356920878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,8192,65536,0.26838293075561526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,128,0.015811199943224587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,512,0.016897066434224447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,64,0.01590933303038279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,32,0.01627306640148163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,16384,0.06696853637695313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,8192,0.039435732364654544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,7168,0.03609706560770671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,10240,0.04516799847284953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,12288,0.051831467946370446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,6144,0.032789333661397295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,5120,0.02935360074043274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,3072,0.023431466023127238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,2048,0.020974934101104736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,3584,0.02480213244756063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,4096,0.02597973346710205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,2560,0.022284799814224245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,1536,0.01959999998410543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,7168,65536,0.23376107215881348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,64,0.01597653329372406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,768,0.01727679967880249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,1024,0.017578667402267455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,512,0.01660053332646688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,256,0.01599253316720327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,128,0.0157258669535319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,32,0.016081066926320393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,6144,0.030406399567921953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,8192,0.03855679829915364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,7168,0.03559253215789795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,10240,0.04380799929300944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,12288,0.04730986754099528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,16384,0.06543999910354614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,6144,65536,0.22409920692443847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,5120,0.02835200031598409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,4096,0.025035732984542848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,3584,0.02466239929199219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,3072,0.022740266720453896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,1536,0.018875734011332194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,2560,0.02123946746190389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,1024,0.017518933614095053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,2048,0.020427733659744263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,128,0.015542399883270264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,512,0.01648533344268799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,768,0.0169813334941864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,256,0.01565226713816325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,64,0.01548906664053599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,32,0.015650133291880287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,7168,0.031750400861104325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,16384,0.056934400399525964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,6144,0.028637866179148357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,8192,0.03559039831161499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,10240,0.04033493200937907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,12288,0.04486506779988607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,5120,65536,0.20478827158610025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,4096,0.024043732881546022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,5120,0.02603413263956706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,1536,0.01754986643791199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,3584,0.02296533385912577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,3072,0.021622399489084877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,2560,0.020654932657877604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,2048,0.019348265727361043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,512,0.016106667121251424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,1024,0.016935465733210246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,768,0.01664426624774933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,256,0.0155349334081014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,128,0.01498133341471354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,32,0.015438933173815408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,64,0.015459199746449789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,8192,0.03197866678237915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,7168,0.029422932863235475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,6144,0.027318400144577027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,16384,0.05565333366394043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,4096,65536,0.17744000752766925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,12288,0.04312106768290202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,10240,0.037834668159484865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,3072,0.02137813369433085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,5120,0.025204267104466754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,4096,0.023277866840362548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,3584,0.022051199277242025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,2560,0.02005973259607951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,2048,0.018722132841746012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,1024,0.016707199811935424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,1536,0.017498666048049928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,32,0.015474133690198264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,768,0.016216533382733662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,512,0.01585493286450704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,256,0.015307733416557312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,128,0.015269333124160766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,64,0.015227733055750528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3584,65536,0.16381440162658692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,6144,0.026150399446487428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,7168,0.02858240008354187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,8192,0.03161173264185588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,10240,0.03612373272577922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,12288,0.04102506637573242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,16384,0.05208746592203776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,5120,0.024810665845870973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,2560,0.01943146586418152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,3584,0.021626667181650797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,1024,0.01658453345298767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,4096,0.022435200214385987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,3072,0.020465066035588585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,1536,0.017458132902781167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,2048,0.017912532885869345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,256,0.015280000368754067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,768,0.01601920028527578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,512,0.01598186691602071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,32,0.015451733271280924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,128,0.015085867047309876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,64,0.015010133385658264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,3072,65536,0.14933652877807618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,8192,0.02899199922879537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,7168,0.02699626684188843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,12288,0.042078932126363114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,10240,0.0327839990456899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,6144,0.02505386670430501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,16384,0.05209600130716959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,4096,0.021600000063578286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,5120,0.023825067281723022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,1536,0.017299199104309083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,3584,0.02104640007019043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,3072,0.019832533597946168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,2560,0.018257067600886027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,2048,0.017732266585032144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,1024,0.016291200121243795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,512,0.015586133797963461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,768,0.0160480002562205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,256,0.01527253290017446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,128,0.014995200435320535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,64,0.015020799636840821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,32,0.015083733201026916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2560,65536,0.13980159759521485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,16384,0.04760853449503581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,12288,0.033532798290252686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,6144,0.023703465859095253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,7168,0.02582719922065735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,8192,0.027574400107065838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,10240,0.029919999837875366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,4096,0.020709333817164104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,5120,0.02230613430341085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,3584,0.019850667317708334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,1024,0.016383999586105348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,3072,0.01912426749865214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,2560,0.01792853275934855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,2048,0.017101866006851197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,1536,0.01718506614367167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,768,0.01573013365268707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,256,0.015384533007939658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,512,0.015635200341542563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,65536,0.12432746887207032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,128,0.015068800250689188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,64,0.015051733454068503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,2048,32,0.015119999647140503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,8192,0.026845866441726686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,12288,0.033111466964085894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,16384,0.04408213297526042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,10240,0.029727999369303388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,5120,0.022010666131973267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,6144,0.023757867018381753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,7168,0.025227733453114826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,4096,0.020317866404851278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,3584,0.019891200462977092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,3072,0.01885333259900411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,2560,0.01834133267402649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,2048,0.017448532581329345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,1536,0.017146666844685875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,1024,0.01628266672293345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,512,0.015434666474660238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,768,0.015963733196258545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,65536,0.11350506941477459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,256,0.014854400356610616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,128,0.014906666676203408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,64,0.014939733346303306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1536,32,0.0152319997549057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,16384,0.03845333258310954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,12288,0.03163413405418396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,10240,0.029167999823888142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,8192,0.027036799987157183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,7168,0.024601600567499795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,5120,0.02101226647694906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,6144,0.02265920042991638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,4096,0.019926400979359944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,3584,0.019217065970102944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,3072,0.01867093245188395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,2560,0.018091734250386557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,768,0.015461333592732749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,512,0.015446399648984274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,1024,0.016309332847595216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,2048,0.017655466000239053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,1536,0.01731946667035421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,65536,0.10198933283487957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,256,0.014882133404413859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,128,0.014523733655611673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,32,0.014758400122324624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,1024,64,0.014879999558130899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,7168,0.02379306753476461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,8192,0.025634133815765382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,12288,0.031014400720596313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,16384,0.03427626689275105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,6144,0.022541866699854533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,10240,0.02844799955685933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,5120,0.021552000443140665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,2048,0.01727679967880249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,4096,0.0196341335773468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,1024,0.01606186628341675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,3584,0.019050665696461997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,3072,0.018339200814565023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,2560,0.01765973369280497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,1536,0.016539733608563742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,65536,0.09579199949900309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,768,0.01569919983545939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,256,0.01514346698919932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,512,0.015450666348139444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,64,0.014712533354759217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,128,0.01495680014292399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,768,32,0.014791466792424521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,10240,0.026680533091227216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,12288,0.02693439920743306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,8192,0.024844799439112344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,16384,0.031653332710266116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,7168,0.023882667223612465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,6144,0.02257919907569885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,5120,0.020812799533208214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,4096,0.01983573238054911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,3584,0.019010132551193236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,3072,0.01834026575088501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,2048,0.01737920045852661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,2560,0.018091734250386557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,65536,0.09224212964375814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,1536,0.016547200083732606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,768,0.015506133437156677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,1024,0.015848533312479655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,512,0.01523413360118866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,128,0.014942933122316995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,256,0.014713600277900696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,64,0.014654933412869772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,512,32,0.014672000209490457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,10240,0.02523733377456665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,16384,0.030195200443267824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,12288,0.02694293260574341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,8192,0.023707733551661173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,7168,0.023041067520777385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,6144,0.022300799687703453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,4096,0.019399466117223103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,5120,0.020707199970881142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,3584,0.01880000034968058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,65536,0.08573439915974936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,3072,0.018202666441599527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,2560,0.01770346760749817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,1024,0.015733333428700765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,2048,0.01669973333676656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,1536,0.016427733500798545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,512,0.01520746648311615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,768,0.015546666582425437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,32,0.014749866724014283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,256,0.01479039986928304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,64,0.014801067113876343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,256,128,0.014636799693107605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,12288,0.025960532824198405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,16384,0.02886506716410319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,8192,0.023194666703542074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,10240,0.024954666694005333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,6144,0.022139734029769896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,5120,0.020528000593185425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,7168,0.02309439977010091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,3072,0.018212266763051353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,4096,0.019375999768575035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,3584,0.018387200435002644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,65536,0.08153066635131836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,2560,0.01732586622238159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,768,0.015301332871119181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,1536,0.016313599546750386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,2048,0.017010132471720375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,1024,0.015675733486811318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,512,0.015051733454068503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,256,0.014754133423169455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,128,0.014401066303253173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,64,0.01443839967250824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,128,32,0.014542933305104574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,12288,0.025836799542109174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,10240,0.024949334065119424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,16384,0.02876480023066203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,5120,0.02031466762224833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,8192,0.023400533199310302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,7168,0.022959999243418374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,6144,0.02208426594734192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,2560,0.01737706661224365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,3584,0.018372267484664917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,4096,0.019143466154734293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,65536,0.07917439937591553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,3072,0.01788053313891093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,2048,0.016673066218694053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,1536,0.016103466351826988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,1024,0.01567146678765615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,768,0.015375999609629312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,512,0.015165866414705912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,256,0.01472106675306956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,64,0.014551466703414917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,128,0.014546133081118264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,64,32,0.01453439990679423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,12288,0.025604265928268432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,10240,0.024937599897384644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,16384,0.028615466753641766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,8192,0.0233024001121521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,7168,0.02314880092938741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,6144,0.021767467260360718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,5120,0.020472532510757445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,3584,0.018497065703074137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,4096,0.019374932845433554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,65536,0.07720639705657958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,2560,0.017514665921529136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,3072,0.017899733781814576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,2048,0.017066667477289833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,768,0.015341867009798685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,1536,0.016132266322771708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,512,0.01527679959932963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,1024,0.01581546664237976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,128,0.014645333091417948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,256,0.014523733655611673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,64,0.014401066303253173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,256,32,32,0.014652799566586813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,4096,0.10509440104166667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,5120,0.12539093494415282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,6144,0.14407679239908855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,7168,0.16189866065979003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,8192,0.18250346183776855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,3584,0.09503359794616699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,10240,0.22049493789672853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,3072,0.08511573473612467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,768,0.039847465356191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,2560,0.07327786286671957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,1536,0.053199998537699376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,2048,0.062490665912628175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,1024,0.04334933360417684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,256,0.028112000226974486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,512,0.03298346598943074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,128,0.02532800038655599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,64,0.02555626630783081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,32,0.02749546567598979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,12288,0.2581258614857992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,65536,16384,0.3473696072896322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,6144,0.05168639818827311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,7168,0.0538101315498352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,8192,0.06027093331019083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,10240,0.07206400235493979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,12288,0.08477439880371093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,16384,0.10453226566314697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,5120,0.0458624005317688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,1536,0.02381226619084676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,3584,0.035572266578674315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,3072,0.033090132474899295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,4096,0.03851519823074341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,2560,0.029949865738550824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,2048,0.02635519901911418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,768,0.02032426595687866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,1024,0.021290665864944457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,256,0.016549332936604818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,512,0.019001599152882895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,128,0.016450132926305136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,64,0.016616533199946083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,32,0.01700053413709005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,8192,0.049906134605407715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,6144,0.041169067223866776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,7168,0.04547306696573893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,10240,0.05778239965438843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,12288,0.06683306694030762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,16384,0.0862709363301595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,2048,0.024062933524449666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,2560,0.02576746741930644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,4096,0.03303359945615132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,3584,0.03067413369814555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,5120,0.0368938684463501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,3072,0.02837226589520772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,1536,0.02180373271306356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,1024,0.020014933745066323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,64,0.016537599762280784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,768,0.019180800517400107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,512,0.017280000448226928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,128,0.016199466586112977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,256,0.01648640036582947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,32,0.01681813398996989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,8192,0.04929706652959188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,10240,0.05758399963378906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,7168,0.044915199279785156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,12288,0.0688693364461263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,16384,0.08548906644185385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,16384,65536,0.3617397308349609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,6144,0.04065066576004028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,4096,0.032525867223739624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,5120,0.03663253386815389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,3584,0.030586665868759154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,3072,0.027911466360092164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,2560,0.02613439957300822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,1536,0.02183786630630493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,2048,0.023690666755040488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,1024,0.01972800095876058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,768,0.018683733542760213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,512,0.017043199141820273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,12288,65536,0.2914698600769043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,256,0.016747732957204185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,32,0.016327466567357382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,128,0.01593706707159678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,64,0.015994667013486227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,12288,0.057061334451039634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,6144,0.03498560190200806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,10240,0.04827839930852254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,8192,0.04291839996973674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,7168,0.03880106608072917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,16384,0.07105493545532227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,5120,0.030139732360839843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,3072,0.023592533667882283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,2560,0.022728532552719116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,3584,0.02542933424313863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,4096,0.02674986720085144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,1536,0.019874133666356406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,2048,0.021067732572555543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,10240,65536,0.2706240018208822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,1024,0.01835306684176127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,768,0.016736000776290894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,512,0.016770132382710776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,128,0.01555519998073578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,256,0.016084266702334087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,32,0.015579733252525329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,64,0.015796266992886863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,7168,0.038540800412495926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,6144,0.034082134564717606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,10240,0.04642346700032552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,8192,0.04263360102971395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,12288,0.05406506856282552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,16384,0.0722389300664266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,3584,0.026053333282470705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,4096,0.02752106587092082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,5120,0.030972800652186078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,3072,0.024171733856201173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,2560,0.022999467452367146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,1536,0.020139733950297035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,8192,65536,0.21969920794169107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,2048,0.02139520049095154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,768,0.017520000537236534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,1024,0.01874133348464966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,128,0.01566933294137319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,512,0.017044266064961754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,256,0.01646613379319509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,32,0.016017066438992818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,64,0.015226667126019796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,12288,0.051742935180664064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,6144,0.033019733428955075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,7168,0.036534400780995686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,8192,0.03983680009841919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,10240,0.04593600034713745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,5120,0.02909440000851949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,16384,0.06957440376281739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,2048,0.020528000593185425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,3584,0.02462079922358195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,3072,0.02348053256670634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,4096,0.026284799973169966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,2560,0.0221781333287557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,1024,0.017192532618840538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,7168,65536,0.20513812700907388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,1536,0.01917866667111715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,768,0.01662399967511495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,512,0.016499200463294984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,128,0.01535360018412272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,64,0.01546346644560496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,256,0.01593386630217234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,32,0.015500799814860026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,8192,0.03542933464050293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,6144,0.02887360056241353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,7168,0.032586665948232015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,10240,0.040174933274586995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,12288,0.04501226743062337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,5120,0.026579199234644572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,16384,0.0560426672299703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,6144,65536,0.18218986193339032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,4096,0.02432533303896586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,2048,0.0200053334236145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,3584,0.023271467288335165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,3072,0.022230400641759237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,2560,0.021022933721542358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,1536,0.018793600797653198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,1024,0.01728746692339579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,768,0.01602453291416168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,512,0.016293332974116007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,256,0.0155157337586085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,128,0.01514240006605784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,64,0.015402666727701821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,32,0.01535146633783976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,12288,0.04486613273620606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,7168,0.030850134293238324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,6144,0.02834666570027669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,16384,0.058609068393707275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,10240,0.03946133454640706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,8192,0.03499946594238281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,5120,0.024563199281692503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,5120,65536,0.1651327927907308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,1536,0.017143466075261436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,4096,0.022616533438364665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,3584,0.022207999229431154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,3072,0.02092693249384562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,2560,0.019716266791025797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,2048,0.018755199511845906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,768,0.016371200482050575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,1024,0.016479999820391337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,256,0.015570132931073507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,512,0.016178133090337117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,128,0.015218133727709452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,64,0.01520853340625763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,32,0.015482667088508605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,4096,65536,0.14327680269877116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,6144,0.02683839996655782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,8192,0.030971733729044597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,7168,0.02922240098317464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,16384,0.05066773494084677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,5120,0.024961066246032716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,12288,0.04118293523788452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,10240,0.036584532260894774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,2048,0.01789120038350423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,4096,0.022770132621129355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,2560,0.019778132438659668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,3584,0.02204373280207316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,3072,0.020780799786249797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,1536,0.017618133624394735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,1024,0.016838399569193523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,768,0.0165173331896464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,512,0.01592853367328644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,256,0.015554133057594299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,128,0.015171200037002563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,64,0.015130666891733804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,32,0.015421866377194723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3584,65536,0.1369152069091797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,16384,0.044523731867472334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,10240,0.03320639928181966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,8192,0.029305599133173627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,7168,0.026984532674153645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,12288,0.03761386473973592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,6144,0.02542933424313863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,4096,0.022182399034500123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,5120,0.024401066700617473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,3584,0.021643733978271483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,1024,0.016594133774439492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,2560,0.019321600596110024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,2048,0.01845653255780538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,3072,0.020718934138615926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,1536,0.01704746683438619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,768,0.01623040040334066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,512,0.015998933712641397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,256,0.015561599532763162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,64,0.015108266472816467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,128,0.015132799744606018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,32,0.01548693378766378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,10240,0.02981226642926534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,3072,65536,0.12375893592834472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,12288,0.03406399885813395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,8192,0.027396267652511595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,16384,0.04633280038833618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,5120,0.022111999988555908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,7168,0.02555840015411377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,6144,0.02388906677563985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,4096,0.0209824005762736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,3584,0.019966934124628702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,2048,0.01762239933013916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,3072,0.018863999843597413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,2560,0.018092799186706542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,1024,0.016353066762288412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,1536,0.01675093372662862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,256,0.015586133797963461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,512,0.015783466895421348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,768,0.016104533274968465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,64,0.015086932977040609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,128,0.015144532918930054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,32,0.01534933348496755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2560,65536,0.11396160125732421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,8192,0.02568639914194743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,12288,0.03136533300081889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,10240,0.028258132934570312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,16384,0.0450709342956543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,7168,0.024173865715662636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,6144,0.022950400908788048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,5120,0.021288534005482994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,4096,0.02030293345451355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,2560,0.018242132663726807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,3584,0.01908373236656189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,2048,0.01745599905649821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,3072,0.019004799922307334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,1536,0.016927999258041383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,512,0.015735466281572977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,768,0.015702399611473083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,1024,0.015988266468048094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,128,0.014779733618100485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,64,0.015026133259137472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,256,0.015184000134468079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,65536,0.10733653704325359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,2048,32,0.015241600076357522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,16384,0.037752532958984376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,12288,0.03200640082359314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,8192,0.027612799406051637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,4096,0.019851734240849815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,5120,0.021002666155497233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,7168,0.023809067408243813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,6144,0.022407466173172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,10240,0.029402667284011842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,2560,0.017697066068649292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,3584,0.018258132537206016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,3072,0.018794665733973183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,2048,0.017356799046198527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,1536,0.016567466656366985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,1024,0.01586133340994517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,768,0.015735466281572977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,512,0.015385599931081137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,65536,0.09536106586456299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,256,0.015029333035151162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,64,0.014846932888031007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,128,0.014975999792416891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1536,32,0.014867200454076131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,12288,0.02887679934501648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,16384,0.033212800820668534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,10240,0.026980266968409224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,8192,0.02503040035565694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,6144,0.021939200162887574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,7168,0.02329279979070028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,4096,0.019052799542744955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,5120,0.019960532585779824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,3072,0.018202666441599527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,3584,0.01877546707789103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,512,0.015233066678047181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,2560,0.017512534062067667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,1024,0.015833600362141927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,65536,0.08768853346506754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,2048,0.017128533124923705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,768,0.015692800283432007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,1536,0.0165802667538325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,256,0.014924800395965577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,128,0.01480959951877594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,32,0.014660267035166421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,1024,64,0.014759467045466105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,10240,0.025948800643285114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,16384,0.030801065762837726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,12288,0.027752532561620073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,7168,0.023221333821614586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,8192,0.023222400744756063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,6144,0.021282132466634116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,5120,0.01999680002530416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,2560,0.017768534024556477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,1536,0.016353066762288412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,4096,0.019143466154734293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,3584,0.01865066687266032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,3072,0.018181333939234413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,2048,0.016898133357365928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,65536,0.07808852990468343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,1024,0.01564586659272512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,768,0.01550933321317037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,512,0.015078399578730264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,256,0.015004799763361613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,128,0.014723199605941772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,64,0.01456000010172526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,768,32,0.014604799946149192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,10240,0.02514773408571879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,12288,0.02582826614379883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,16384,0.02874880035718282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,8192,0.02353386680285136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,7168,0.022824533780415854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,6144,0.02157013416290283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,5120,0.019747199614842732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,4096,0.018823466698328652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,3584,0.01853013237317403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,65536,0.07319786548614501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,3072,0.017720532417297364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,1536,0.01653333306312561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,512,0.015186132987340293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,2560,0.017297067244847617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,2048,0.016858667135238647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,768,0.015545599659283958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,1024,0.015629866719245912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,128,0.0144405335187912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,256,0.014773333072662353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,32,0.014677332838376364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,512,64,0.014528000354766845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,12288,0.02432639996210734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,10240,0.023806933561960855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,16384,0.028009599447250365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,6144,0.02136533260345459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,8192,0.022626133759816487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,7168,0.02181866765022278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,5120,0.019553067286809285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,4096,0.018833067019780478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,65536,0.0678218682607015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,2560,0.01732800006866455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,3584,0.01829973260561625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,3072,0.01792959968249003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,2048,0.016747732957204185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,256,0.014782933394114175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,1536,0.01602133313814799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,1024,0.01576533317565918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,768,0.015491200486818948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,512,0.015194666385650635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,128,0.014538666605949402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,64,0.014638933539390563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,256,32,0.014646400014559427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,10240,0.023050665855407715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,16384,0.026294400294621784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,12288,0.024293333292007446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,8192,0.02212693293889364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,7168,0.021857066949208578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,6144,0.021207465728123983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,4096,0.018653867642084758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,5120,0.019668267170588175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,65536,0.06297706762949626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,3584,0.018318933248519898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,1024,0.015625600020090738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,3072,0.01759679913520813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,2560,0.017078399658203125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,512,0.015335466464360556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,1536,0.016127999623616537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,2048,0.016742400328318276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,768,0.01508799990018209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,128,0.014447999993960061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,256,0.014907733599344889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,64,0.014514133334159851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,128,32,0.01453439990679423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,8192,0.021782400210698445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,12288,0.02410879929860433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,10240,0.023540266354878745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,16384,0.02619733413060506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,7168,0.02172373334566752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,5120,0.019489065806070963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,65536,0.05983359813690185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,6144,0.02099519968032837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,4096,0.018553600708643595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,3584,0.01808533271153768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,3072,0.017681066195170084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,2560,0.017051732540130614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,512,0.015065600474675497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,1024,0.015250133474667868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,2048,0.0164490669965744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,1536,0.01612053314844767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,768,0.015473066767056783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,256,0.014869333306948344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,32,0.01441493332386017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,128,0.01442026694615682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,64,64,0.014512000481287637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,7168,0.02159893314043681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,16384,0.025965867439905803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,12288,0.023718400796254476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,10240,0.0232149342695872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,8192,0.021679999430974324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,6144,0.020920532941818237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,5120,0.019394133488337198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,65536,0.057181866963704434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,4096,0.01857173244158427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,2560,0.016953599452972413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,3072,0.017520000537236534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,3584,0.018004266421000163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,1536,0.01618880033493042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,2048,0.016551466782887776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,768,0.015286399920781454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,512,0.015011200308799743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,1024,0.015477333466211954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,256,0.014910933375358582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,128,0.014548266927401224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,64,0.014424533645311991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,192,32,32,0.014454399545987448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,4096,0.09605120023091634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,5120,0.11432747046152751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,6144,0.13022720019022624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,7168,0.1467413266499837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,8192,0.16549866994222004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,10240,0.20056427319844566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,3584,0.08726507027943929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,3072,0.0775317351023356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,2560,0.06824639638264975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,1024,0.04086720148722331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,2048,0.05833386580149332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,1536,0.0487008015314738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,12288,0.2365664005279541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,256,0.02481600046157837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,512,0.029557333389918013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,128,0.02225173314412435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,768,0.03502506812413533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,32,0.022073600689570108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,64,0.0219541331132253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,10240,0.06595733165740966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,6144,0.04579519828160604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,7168,0.050405331452687586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,8192,0.05596266587575277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,12288,0.07620159784952799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,16384,0.09722452958424886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,65536,16384,0.3037439982096354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,5120,0.04149760007858276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,3072,0.031939200560251874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,4096,0.036073601245880126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,3584,0.034306132793426515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,1024,0.020894932746887206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,2560,0.02900693416595459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,1536,0.02411306699117025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,768,0.020244266589482626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,512,0.01889066696166992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,256,0.017214934031168617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,2048,0.02605760097503662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,128,0.016350932916005454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,64,0.01649173299471537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,32,0.01690453290939331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,8192,0.048290133476257324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,7168,0.044437332948048906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,10240,0.05701653162638346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,6144,0.04051946798960368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,12288,0.06513173182805379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,5120,0.03556480010350545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,16384,0.08230400085449219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,2560,0.02502826650937398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,2048,0.023128533363342287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,3072,0.02712106704711914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,3584,0.02956586678822835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,4096,0.03146026730537414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,768,0.018979199727376304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,1536,0.021091200908025107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,1024,0.019706666469573975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,64,0.01630400021870931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,512,0.0169322669506073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,256,0.016411733627319337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,128,0.015773866573969522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,32,0.016331733266512553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,8192,0.048946134249369305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,7168,0.04409279823303223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,10240,0.056677333513895665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,16384,0.08712746302286783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,12288,0.06442879835764567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,16384,65536,0.3457365353902181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,3072,0.02560746669769287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,2560,0.02486613392829895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,6144,0.038702932993570964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,3584,0.02750399907430013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,4096,0.029710932572682695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,5120,0.033207466204961136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,1536,0.021130667130152384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,768,0.01843093236287435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,1024,0.01927786668141683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,2048,0.02232746680577596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,512,0.016871466239293417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,256,0.016166399916013083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,128,0.015962666273117064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,64,0.016004266341527303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,32,0.015923200050989787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,12288,65536,0.2686314582824707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,8192,0.03804479837417603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,10240,0.043010131518046064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,7168,0.034086398283640545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,12288,0.047806934515635176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,16384,0.05990826686223348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,6144,0.033318400382995605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,3584,0.024055467049280802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,4096,0.02582506736119588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,5120,0.029191466172536214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,3072,0.02291626731554667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,2560,0.021833600600560506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,2048,0.020584533611933388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,1024,0.017509333292643228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,1536,0.019131733973821004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,768,0.01690559983253479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,64,0.015773866573969522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,512,0.016370133558909098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,10240,65536,0.24141546885172524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,128,0.015373866756757101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,256,0.015763200322786965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,32,0.015617066621780395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,7168,0.03322986761728923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,6144,0.030373332897822063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,8192,0.03566720088322957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,16384,0.05641813278198242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,10240,0.04256639877955119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,12288,0.04566826820373535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,3072,0.022382932901382446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,4096,0.02444159984588623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,3584,0.023778132597605386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,2560,0.02169706622759501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,2048,0.020119466384251914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,5120,0.027666133642196656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,1536,0.019341866175333657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,8192,65536,0.20022080739339193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,64,0.015513599912325541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,1024,0.017065600554148356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,768,0.016310399770736693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,512,0.0164682666460673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,128,0.015364266435305276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,256,0.015546666582425437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,32,0.015198933084805808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,8192,0.034436265627543136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,7168,0.031724800666173295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,10240,0.03962560097376506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,6144,0.029099732637405396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,12288,0.043800532817840576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,7168,65536,0.1872938632965088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,16384,0.0534880002339681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,4096,0.024053333202997844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,5120,0.02654613256454468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,2048,0.020435200134913126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,3072,0.022106667359670006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,3584,0.023286400238672893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,2560,0.02109973430633545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,1536,0.01880426605542501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,512,0.016474666198094685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,1024,0.01722666621208191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,768,0.016362667083740234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,256,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,128,0.015402666727701821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,64,0.015243732929229736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,32,0.015215999881426492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,6144,0.02879040042559306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,8192,0.03535786469777425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,7168,0.032321067651112874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,10240,0.03999146620432536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,12288,0.04562986691792806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,6144,65536,0.16611199378967284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,16384,0.05576320091883341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,5120,0.025499733289082845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,2048,0.02008533279101054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,4096,0.023792000611623128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,3584,0.022734934091567995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,2560,0.021211733420689903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,3072,0.021915733814239502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,1536,0.01835413376490275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,768,0.017013333241144814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,1024,0.01699733336766561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,512,0.016404267152150473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,256,0.015803733468055726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,128,0.015370666980743408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,64,0.015305599570274353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,32,0.015546666582425437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,5120,65536,0.14645439783732098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,7168,0.0310261329015096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,6144,0.028292266527811687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,10240,0.03794346650441487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,8192,0.034790400664011636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,16384,0.04876586596171061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,12288,0.04171839952468872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,5120,0.023947733640670776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,4096,0.021580799420674642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,3584,0.021036799748738608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,1024,0.016451199849446617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,3072,0.020362667242685952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,2560,0.01930346687634786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,2048,0.01824000080426534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,1536,0.0172650674978892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,256,0.015387733777364096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,768,0.015972266594568886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,512,0.015732266505559287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,32,0.015171200037002563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,64,0.015334399541219077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,128,0.01514240006605784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,4096,65536,0.12991360028584797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,8192,0.02685333291689555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,6144,0.023864533503850302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,7168,0.025140267610549927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,5120,0.022552533944447836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,16384,0.046288001537323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,10240,0.03595093488693237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,12288,0.039926401774088544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,2560,0.019117865959803262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,2048,0.017612799008687337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,4096,0.02095680038134257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,3584,0.02026453415552775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,3072,0.019735467433929444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,1536,0.016764799753824867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,1024,0.016141866644223533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,768,0.016145066420237223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,512,0.015574399630228677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,64,0.015035733580589294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,256,0.01502293348312378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,128,0.01507306694984436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,32,0.014995200435320535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3584,65536,0.12111252943674725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,12288,0.03816213210423787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,16384,0.04506560166676839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,10240,0.03446186780929565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,8192,0.028894933064778645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,7168,0.027607466777165728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,6144,0.025212800502777098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,5120,0.022155733903249104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,4096,0.02066133419672648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,3584,0.02044693430264791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,3072,0.019486933946609497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,2560,0.018850133816401164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,2048,0.01730560064315796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,1536,0.01695573329925537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,1024,0.01622719963391622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,512,0.01575573285420736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,768,0.01584106683731079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,256,0.015220266580581666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,128,0.015015467007954916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,64,0.014780799547831217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,32,0.014959999918937683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,3072,65536,0.112008531888326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,12288,0.03444693485895793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,16384,0.040651734670003256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,10240,0.03150933384895325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,8192,0.027455999453862505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,7168,0.026107732454935712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,4096,0.02077440023422241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,6144,0.02409706711769104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,5120,0.021893332401911415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,3584,0.019845332702000937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,1536,0.016722132762273155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,3072,0.018425599733988444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,2560,0.018525866667429607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,2048,0.01681493322054545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,1024,0.016139733791351318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,768,0.015748266379038492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,256,0.015032533804575601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,512,0.01564479966958364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,65536,0.10341760317484539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,128,0.014939733346303306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,64,0.014808533589045205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2560,32,0.015042133132616677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,12288,0.03129600087801616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,10240,0.02804906765619914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,16384,0.037109335263570145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,7168,0.024062933524449666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,8192,0.025387734174728394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,6144,0.022317866484324135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,5120,0.021146667003631592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,3584,0.018956800301869713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,4096,0.019826134045918785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,3072,0.01874133348464966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,768,0.015939199924468996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,2560,0.017321600516637167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,2048,0.017002665996551515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,1536,0.01626240015029907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,1024,0.015801599621772765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,512,0.015389866630236306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,65536,0.09237439632415771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,128,0.014891733725865683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,256,0.014934399724006652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,64,0.015068800250689188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,2048,32,0.015282133221626281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,12288,0.030898133913675945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,16384,0.03344213167826335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,8192,0.025627734263737996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,10240,0.028308266401290895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,7168,0.024069333076477052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,5120,0.020870399475097657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,6144,0.02267199953397115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,3072,0.01842986742655436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,4096,0.019045333067576088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,3584,0.018380800882975258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,2560,0.01733120083808899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,768,0.01585706671079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,65536,0.08364799817403158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,2048,0.01684479912122091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,512,0.015321600437164306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,1536,0.01646506687005361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,1024,0.015920000274976094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,256,0.014958932995796204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,64,0.014899200201034546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,128,0.014702933033307395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1536,32,0.014706133802731832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,7168,0.022657066583633423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,16384,0.03198506633440654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,10240,0.025963733593622845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,12288,0.028356266021728516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,8192,0.024145066738128662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,6144,0.021387734015782676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,4096,0.020165334145228066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,5120,0.019613866011301676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,3584,0.018347734212875368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,3072,0.018026665846506754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,2560,0.017617066701253258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,2048,0.017211733261744182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,1536,0.016294399897257485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,1024,0.015878400206565856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,65536,0.07263573010762533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,512,0.015286399920781454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,768,0.015331199765205384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,256,0.014874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,128,0.014812800288200378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,64,0.014514133334159851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,1024,32,0.014691199858983359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,12288,0.026954666773478193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,16384,0.03146026730537414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,10240,0.025183999538421632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,8192,0.02390186587969462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,4096,0.01880106727282206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,6144,0.02140373388926188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,7168,0.022651733954747517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,5120,0.019709867238998414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,65536,0.06893226305643717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,2048,0.016857600212097167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,3072,0.01760639945665995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,3584,0.018237866957982383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,2560,0.017395200332005818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,1536,0.01621333360671997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,1024,0.015824000040690102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,256,0.014730667074521383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,768,0.015293866395950317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,512,0.01536853313446045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,128,0.014615466197331747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,32,0.014784000317255654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,768,64,0.014647466937700906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,8192,0.02201706568400065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,12288,0.02474453250567118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,16384,0.02802986701329549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,10240,0.023502933979034423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,7168,0.02174293398857117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,6144,0.020916267236073812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,5120,0.019396267334620156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,4096,0.018691200017929076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,3584,0.018231467405954997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,65536,0.06406186819076538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,3072,0.01763520042101542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,2560,0.0172650674978892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,2048,0.016561067104339598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,1536,0.016037333011627197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,768,0.015464533368746439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,1024,0.015574399630228677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,512,0.015093333522478738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,256,0.014802133043607077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,128,0.014659200112024942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,64,0.014539733529090881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,512,32,0.014434132973353067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,10240,0.022744532426198324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,16384,0.025354667504628496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,12288,0.02361066738764445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,5120,0.019106133778889974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,4096,0.018499199549357095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,8192,0.02170133392016093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,7168,0.021652267376581828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,65536,0.0599616010983785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,6144,0.020530132452646892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,3584,0.018103466431299845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,3072,0.017340799172719322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,2048,0.01662826637427012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,256,0.014862933754920959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,1024,0.015620266397794088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,2560,0.017043199141820273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,1536,0.016037333011627197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,768,0.015266133348147073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,512,0.015146666765213012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,128,0.014467199643452963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,32,0.014536533753077188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,256,64,0.014408533771832785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,10240,0.022674133380254112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,16384,0.025091199080149333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,12288,0.02290453314781189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,8192,0.02160853346188863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,6144,0.020784000555674233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,5120,0.019352533419926963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,7168,0.021191465854644775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,65536,0.05400853157043457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,2048,0.01663040022055308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,4096,0.018525866667429607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,1536,0.015913599729537965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,3584,0.01753386656443278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,3072,0.017498666048049928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,2560,0.01703146696090698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,1024,0.015438933173815408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,768,0.01516480048497518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,512,0.015084800124168397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,128,0.014428800344467163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,32,0.014596266547838846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,256,0.014668800433476768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,128,64,0.01439786652723948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,16384,0.024740266799926757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,10240,0.02264960010846456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,8192,0.021101866165796915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,12288,0.022874667247136434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,65536,0.05034346580505371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,7168,0.021474132935206093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,6144,0.02063039938608805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,5120,0.01899626652399699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,4096,0.018380800882975258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,3072,0.017400532960891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,3584,0.018074667453765868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,2560,0.016875733931859337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,2048,0.016523733735084534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,1024,0.015457066893577575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,1536,0.016102400422096253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,768,0.015076266725858054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,256,0.014502400159835815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,128,0.014521599809328715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,512,0.014972800016403198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,32,0.014425599575042724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,64,64,0.014524799585342408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,8192,0.021581866343816123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,10240,0.021997867027918498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,16384,0.024954666694005333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,12288,0.02241920034090678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,7168,0.02136639952659607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,6144,0.020141865809758505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,5120,0.019378133614857993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,65536,0.04822719891866048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,4096,0.01847040057182312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,3584,0.017831466595331826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,3072,0.017318399747212727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,2048,0.01646613379319509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,2560,0.016756266355514526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,256,0.014597333470980325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,512,0.015251200397809347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,1536,0.016005333264668783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,1024,0.015579733252525329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,768,0.015205333630243937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,128,0.014383999506632486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,64,0.014432000120480857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,160,32,32,0.014419200023015341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,4096,0.09148266315460205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,5120,0.10756479899088542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,6144,0.12477013270060222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,7168,0.14004480044047038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,8192,0.1587125301361084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,3584,0.08292160034179688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,2048,0.05586666663487753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,3072,0.07359466552734376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,10240,0.19259626070658367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,2560,0.0646016001701355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,1536,0.04634773333867391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,12288,0.22675093015034994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,1024,0.03774293263753255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,128,0.020994132757186888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,256,0.02345493237177531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,64,0.020773333311080933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,512,0.027845333019892376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,768,0.03318399985631307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,32,0.019777067502339683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,6144,0.044499198595682785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,65536,16384,0.29350401560465494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,7168,0.04841279983520508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,8192,0.05429759820302328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,10240,0.06224106550216675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,12288,0.07184747060139975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,4096,0.037089065710703535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,5120,0.03992213408152263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,3584,0.03322346607844035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,3072,0.030712532997131347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,16384,0.09162879784901937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,1024,0.020451200008392335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,2560,0.02813653349876404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,1536,0.02272319992383321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,2048,0.025033599138259886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,128,0.016107733050982155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,768,0.019410133361816406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,256,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,512,0.01764586567878723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,32,0.016609066724777223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,64,0.01657919983069102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,7168,0.040907732645670575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,6144,0.03768320083618164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,8192,0.045304532845815024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,10240,0.05197333494822184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,12288,0.05889600118001302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,16384,0.07417919635772705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,5120,0.034116268157958984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,2560,0.02430079976717631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,4096,0.030517333745956422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,3584,0.028585600852966308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,1536,0.020777599016825358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,2048,0.022476800282796226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,3072,0.026336000363032026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,1024,0.019223467508951823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,256,0.015941333770751954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,768,0.018323200941085815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,128,0.015677866339683533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,512,0.016501333316167197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,64,0.015714133779207863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,32,0.01602026621500651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,12288,0.052894934018452966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,8192,0.04104959964752197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,7168,0.037623465061187744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,10240,0.046366933981577554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,16384,0.06668480237325033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,6144,0.03463146686553955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,3072,0.024145066738128662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,4096,0.02786986629168193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,5120,0.03102933367093404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,3584,0.02590293288230896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,16384,65536,0.32696320215861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,2560,0.022699733575185142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,256,0.01576959987481435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,2048,0.021204266945521036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,1024,0.018465065956115724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,768,0.016668800512949625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,1536,0.0197269340356191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,512,0.015547733505566916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,128,0.015668267011642457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,64,0.015736533204714458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,32,0.015508266290028891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,12288,65536,0.2573344071706136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,10240,0.041187198956807454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,7168,0.03295893271764119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,8192,0.036558934052785236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,16384,0.05728533267974854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,12288,0.045873065789540604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,6144,0.032257066170374556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,5120,0.028148265679677327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,3584,0.02368639906247457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,4096,0.024623999993006386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,1536,0.018934400876363118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,3072,0.022587732474009196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,2560,0.021258666117986044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,2048,0.020163200298945107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,10240,65536,0.22777600288391114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,1024,0.017683200041453042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,512,0.01626240015029907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,768,0.01595626672108968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,256,0.01560426652431488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,128,0.015083733201026916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,32,0.01508799990018209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,64,0.01560426652431488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,7168,0.03184213240941365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,6144,0.029470932483673096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,8192,0.03480213483174642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,16384,0.06198399861653646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,10240,0.03910719950993856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,12288,0.04417706727981567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,3072,0.021585067113240562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,5120,0.02611946662267049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,4096,0.023757867018381753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,3584,0.022482132911682128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,2560,0.020414932568868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,2048,0.01957226594289144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,1536,0.018191999197006224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,8192,65536,0.19368106524149578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,1024,0.016657066345214844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,768,0.016315733393033348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,512,0.015572266777356467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,256,0.015525333086649575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,64,0.01541973352432251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,128,0.01495253344376882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,32,0.015458133816719056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,8192,0.032434133688608806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,7168,0.029064534107844035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,10240,0.035368533929189046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,6144,0.02687573234240214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,12288,0.03937919934590657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,5120,0.024566400051116943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,16384,0.048205868403116865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,2048,0.01909653345743815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,4096,0.022446932395299275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,2560,0.019744000832239785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,3584,0.021716266870498657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,3072,0.020756266514460244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,7168,65536,0.17445546785990398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,1024,0.015927466750144958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,1536,0.017911465962727864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,512,0.015590399503707886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,768,0.0160480002562205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,256,0.015352533260981242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,128,0.014872533082962037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,32,0.015396266182263692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,64,0.015471999843915304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,6144,0.02536960045496623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,7168,0.027446399132410686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,10240,0.03328319986661275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,8192,0.029997867345809937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,12288,0.037588266531626384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,4096,0.02162453333536784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,16384,0.05105813344319662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,6144,65536,0.15546026229858398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,5120,0.02297280033429464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,3584,0.02095359961191813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,3072,0.020114133755366005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,2048,0.01837973395983378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,2560,0.019412267208099365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,1024,0.016221867005030314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,1536,0.016174933314323424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,768,0.01617280046145121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,128,0.015094400445620219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,512,0.015677866339683533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,256,0.015304533640543619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,32,0.015004799763361613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,64,0.01502826710542043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,7168,0.025854933261871337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,6144,0.024090667565663658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,10240,0.030988800525665283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,12288,0.03511679967244466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,8192,0.028486400842666626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,16384,0.04527466694513957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,5120,0.02241599957148234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,5120,65536,0.13768107096354168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,4096,0.020840533574422202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,3072,0.019708800315856933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,1536,0.016852267583211265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,3584,0.020658133427302043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,2560,0.018947199980417887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,2048,0.018387200435002644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,1024,0.01627306640148163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,512,0.01574613352616628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,256,0.015294933319091797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,768,0.015794133146603904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,128,0.015020799636840821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,64,0.015201066931088766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,32,0.015317333738009134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,12288,0.03220906654993693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,4096,65536,0.11909546852111816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,8192,0.025563732782999678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,10240,0.028843732674916585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,6144,0.02297919988632202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,7168,0.023934932549794515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,16384,0.04371840159098307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,5120,0.022030933698018392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,4096,0.020467199881871543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,3072,0.01955519914627075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,2048,0.017292799552281697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,3584,0.020170666774113975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,1536,0.016758400201797485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,2560,0.018513067563374837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,1024,0.016284799575805663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,64,0.01493333379427592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,128,0.014846932888031007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,768,0.01569919983545939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,512,0.015552000204722086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,256,0.01523413360118866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,32,0.015064533551534018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3584,65536,0.11178560256958008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,8192,0.025075199206670125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,7168,0.023542400201161703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,6144,0.02257386644681295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,10240,0.027461334069569902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,16384,0.04059413274129232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,12288,0.031172267595926922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,2048,0.01745599905649821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,5120,0.021082667509714763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,4096,0.020023467143376668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,3072,0.019080533583958944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,3584,0.019610667228698732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,2560,0.018106667200724284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,1536,0.016603733102480568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,1024,0.015901866555213928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,768,0.01600106656551361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,65536,0.10334400335947673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,512,0.015441067020098367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,64,0.014877866705258688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,256,0.015266133348147073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,128,0.014713600277900696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,3072,32,0.015170133113861084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,12288,0.03253119985262553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,7168,0.02318399945894877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,10240,0.026820266246795656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,8192,0.024451200167338052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,6144,0.02215893268585205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,5120,0.0207370658715566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,4096,0.0198634664217631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,16384,0.0380021333694458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,1024,0.015879467129707336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,3072,0.017986132701237997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,3584,0.019219199816385903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,2560,0.017553067207336424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,1536,0.016684800386428833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,2048,0.016898133357365928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,512,0.01520639955997467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,768,0.016030933459599814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,65536,0.09457706610361735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,256,0.015009066462516785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,32,0.015024000406265258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,64,0.014665599664052328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2560,128,0.014840533336003622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,10240,0.027880533536275225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,12288,0.029370667537053426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,16384,0.03434133529663086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,7168,0.02250666618347168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,8192,0.026391466458638508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,6144,0.021623466412226358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,5120,0.02067626714706421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,4096,0.019795199235280357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,3584,0.018729599316914876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,3072,0.018283732732137046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,2560,0.017385600010553996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,768,0.015737600127855935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,2048,0.016777600844701132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,1536,0.016312533617019655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,1024,0.01607360045115153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,65536,0.08257280190785726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,64,0.014729600151379904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,256,0.015034666657447815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,512,0.01508799990018209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,128,0.014725333452224732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,2048,32,0.014630400141080222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,10240,0.02600853244463603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,16384,0.030554666121800737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,12288,0.027548799912134807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,8192,0.023316266139348348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,6144,0.021555199225743612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,7168,0.02213546633720398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,5120,0.019722666343053183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,4096,0.01923946738243103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,3584,0.018708266814549766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,768,0.015707733233769734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,1024,0.015573333700497946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,2560,0.017245866854985557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,3072,0.017570134003957114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,2048,0.016782933473587038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,1536,0.016170666615168253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,65536,0.07539306481679281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,256,0.014959999918937683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,512,0.015309866269429526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,128,0.014648532867431641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,64,0.014745600024859109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1536,32,0.014806399742762247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,10240,0.024114133914311726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,16384,0.02779200077056885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,12288,0.024743467569351196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,6144,0.020754132668177286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,8192,0.022035199403762817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,7168,0.02193386753400167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,4096,0.018424532810846963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,5120,0.019655466079711914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,3584,0.01842026710510254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,3072,0.017746132612228394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,2560,0.01716053287188212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,65536,0.06563626527786255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,1024,0.01576746702194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,2048,0.01663040022055308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,1536,0.016075733304023742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,768,0.015608533223470052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,512,0.015026133259137472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,128,0.014547200004259745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,32,0.014575999975204468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,256,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,1024,64,0.014851199587186179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,16384,0.026770132780075073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,7168,0.021669334173202513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,12288,0.024124799172083537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,8192,0.02209279934565226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,10240,0.024081067244211832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,6144,0.020808533827463786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,5120,0.019346133867899577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,3584,0.018075732390085857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,4096,0.01844266653060913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,2560,0.017080533504486083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,65536,0.06099520126978556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,3072,0.017478400468826295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,1536,0.01589120030403137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,2048,0.016518400112787882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,1024,0.015655466914176942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,768,0.015389866630236306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,256,0.014905599753061929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,512,0.015196800231933594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,128,0.014632532993952433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,64,0.014651733636856078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,768,32,0.014437333742777506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,16384,0.02579200069109599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,12288,0.023125332593917847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,10240,0.022154666980107627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,8192,0.02087679902712504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,7168,0.021332265933354695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,6144,0.020629332462946574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,3072,0.017653334140777587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,5120,0.01920426686604818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,3584,0.018029866615931193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,4096,0.018641066551208497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,65536,0.05670613447825114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,2560,0.016992000738779704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,1024,0.015598932902018229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,2048,0.01653439998626709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,1536,0.01604159971078237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,512,0.015269333124160766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,768,0.015307733416557312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,256,0.014944000045458474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,128,0.014549332857131957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,64,0.014620799819628397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,512,32,0.01458560029665629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,8192,0.020702934265136717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,12288,0.022087466716766358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,10240,0.021498666206995646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,5120,0.019059199094772338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,16384,0.023913600047429404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,7168,0.020909865697224937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,6144,0.020191999276479085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,65536,0.048786131540934245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,2560,0.016888533035914102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,4096,0.01843413313229879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,3584,0.017541333039601644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,3072,0.017127466201782227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,2048,0.016338133811950685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,1024,0.015379200379053751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,1536,0.01593706707159678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,768,0.015286399920781454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,512,0.014924800395965577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,256,0.014773333072662353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,128,0.01479039986928304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,64,0.014477866888046264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,256,32,0.014471466342608133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,10240,0.021613866090774536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,12288,0.022009599208831786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,7168,0.02081706722577413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,16384,0.02373440066973368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,8192,0.020603734254837035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,65536,0.04451093276341756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,6144,0.020266666014989217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,5120,0.018915200233459474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,3072,0.01717653274536133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,4096,0.018363734086354576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,3584,0.018003199497858682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,2048,0.016445866227149962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,2560,0.01683733264605204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,512,0.015285332997639975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,256,0.014994133512179056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,1536,0.01567146678765615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,768,0.015429332852363586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,1024,0.015659733613332113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,64,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,32,0.014436266819636025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,128,128,0.014591999848683677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,16384,0.02341653307278951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,12288,0.022041600942611695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,10240,0.021666133403778078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,8192,0.02029866576194763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,6144,0.020116267601648967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,7168,0.020716800292332967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,4096,0.018222934007644652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,65536,0.04194986820220947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,5120,0.018953599532445273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,3584,0.017779199282328288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,3072,0.017352533340454102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,1024,0.01560640037059784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,2048,0.016210132837295534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,2560,0.016549332936604818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,768,0.015175466736157736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,1536,0.016085333625475564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,512,0.015005866686503092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,64,0.014498133460680643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,32,0.014382933576901754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,256,0.014702933033307395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,64,128,0.014422399799029031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,16384,0.023484800259272257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,12288,0.021766400337219237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,8192,0.02062293291091919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,10240,0.02146773338317871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,65536,0.042019200325012204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,7168,0.02039466698964437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,6144,0.019925334056218467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,5120,0.019040000438690186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,4096,0.018126932779947917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,3072,0.017192532618840538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,3584,0.01766506632169088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,2560,0.01676693360010783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,256,0.014666666587193808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,2048,0.01605013310909271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,1536,0.01583146651585897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,1024,0.015383467078208923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,512,0.015107199549674988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,768,0.015009066462516785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,128,0.014526933431625366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,32,0.014318933089574179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,128,32,64,0.014568533500035605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,4096,0.08690666357676188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,5120,0.10322240193684895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,6144,0.11976532936096192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,7168,0.13509653409322103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,8192,0.15237332979838053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,3072,0.07023786703745524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,3584,0.07841066519419351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,1536,0.0441269318262736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,10240,0.1862122694651286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,2560,0.06152960062026978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,1024,0.03599679867426554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,2048,0.0535103996594747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,768,0.031648000081380204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,128,0.019870932896931967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,64,0.019101866086324058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,256,0.022141865889231362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,512,0.025810132424036663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,32,0.01935466726620992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,12288,0.21833173433939615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,8192,0.05167253414789835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,7168,0.04685333172480265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,10240,0.05938986539840698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,12288,0.06770026683807373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,6144,0.04501973390579224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,65536,16384,0.2839413324991862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,5120,0.038652801513671876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,16384,0.08590826988220215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,3072,0.029662932952245074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,4096,0.034017066160837814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,2560,0.027062400182088213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,3584,0.0319925328095754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,1536,0.02216106653213501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,2048,0.025219200054804485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,1024,0.020001065731048585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,768,0.018949333826700845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,64,0.016196266810099284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,512,0.017449599504470826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,256,0.016012799739837647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,128,0.015800533692042033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,32,0.01586560010910034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,7168,0.039348268508911134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,6144,0.036278398831685384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,8192,0.04299626747767131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,12288,0.05628266731897989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,10240,0.04964480002721151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,16384,0.07003200054168701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,3072,0.02579840024312337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,2048,0.02209386626879374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,2560,0.024012800057729086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,5120,0.03314773241678874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,3584,0.027723733584086103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,4096,0.029290666182835896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,1024,0.0190720001856486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,1536,0.020497065782546998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,512,0.015998933712641397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,128,0.015237333377202353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,256,0.015796266992886863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,768,0.018068265914916993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,64,0.015752533078193666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,32,0.016008533040682473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,7168,0.036761601765950516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,8192,0.03965226809183757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,10240,0.045609601338704425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,12288,0.050824534893035886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,16384,0.07538560231526693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,6144,0.033847467104593916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,5120,0.03096853295962016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,16384,65536,0.3102837244669596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,4096,0.02765760024388631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,3584,0.02584853370984395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,3072,0.024742400646209715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,2560,0.02292799949645996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,2048,0.02147946755091349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,1536,0.0205567995707194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,1024,0.01877973278363546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,256,0.015796266992886863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,128,0.01546346644560496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,768,0.017607466379801432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,512,0.016454399625460307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,64,0.01567893326282501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,32,0.015573333700497946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,12288,65536,0.24119146664937338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,7168,0.03239466746648152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,10240,0.039308798313140866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,8192,0.0351093331972758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,16384,0.054378668467203774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,12288,0.043747198581695554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,6144,0.031438932816187544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,3072,0.021955200036366782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,3584,0.023372799158096313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,2560,0.020750933885574342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,1536,0.01882986625035604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,2048,0.01977919936180115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,5120,0.0272490660349528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,4096,0.02446826696395874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,512,0.015948800245920818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,1024,0.01732053359349569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,10240,65536,0.21561919848124184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,128,0.015065600474675497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,768,0.016193067034085594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,256,0.015470932920773825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,64,0.015254400173823037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,32,0.01544426679611206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,6144,0.027989333868026732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,7168,0.0300053338209788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,8192,0.03295253316561381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,10240,0.03641813198725383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,12288,0.041024001439412434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,16384,0.05008000135421753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,5120,0.02564799984296163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,4096,0.023166932662328086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,3584,0.022317866484324135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,8192,65536,0.18022079467773439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,2560,0.02037546634674072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,1536,0.018262400229771932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,3072,0.021309866507848104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,2048,0.01923840045928955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,128,0.014918399850527444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,512,0.015040000279744467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,1024,0.01581653356552124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,256,0.015282133221626281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,768,0.016315733393033348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,32,0.015161599715550741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,64,0.015244799852371215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,8192,0.030828799804051715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,7168,0.02831573287645976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,6144,0.02626986702283223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,10240,0.034137598673502606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,12288,0.037732267379760744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,16384,0.045739734172821046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,2048,0.018675200144449868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,4096,0.022087466716766358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,5120,0.024104533592859904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,3584,0.021305600802103676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,2560,0.019850667317708334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,3072,0.02073919971783956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,7168,65536,0.16634346644083658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,1536,0.017926400899887084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,1024,0.01619733373324076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,768,0.016182399789492288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,32,0.015260799725850423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,512,0.015745066603024802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,256,0.015204266707102457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,128,0.014932266871134438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,64,0.015237333377202353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,6144,0.024729599555333458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,12288,0.035861333211263016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,7168,0.02672106623649597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,8192,0.028751999139785767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,16384,0.04866986672083537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,10240,0.03216960032780965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,4096,0.021550933519999184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,6144,65536,0.14324906667073567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,5120,0.023156267404556275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,1536,0.0166101336479187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,3072,0.020150399208068846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,3584,0.020829866329828896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,2560,0.019425066312154134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,2048,0.01840106646219889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,1024,0.01612160007158915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,512,0.015684266885121666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,768,0.016058666507403056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,128,0.014869333306948344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,256,0.015218133727709452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,64,0.015134933590888976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,32,0.015098667144775391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,7168,0.026042666037877398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,16384,0.04245226780573527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,6144,0.024197334051132204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,12288,0.03666773239771525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,10240,0.033511467774709064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,8192,0.03073386748631795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,5120,65536,0.1272010644276937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,4096,0.020402133464813232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,5120,0.02145919998486837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,1024,0.016058666507403056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,3584,0.019523199399312338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,2048,0.01656000018119812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,3072,0.01927466591199239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,2560,0.018412800629933675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,1536,0.01644373337427775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,512,0.015413332978884378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,768,0.01578986644744873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,64,0.015317333738009134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,256,0.01493013302485148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,128,0.014740266402562461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,32,0.015215999881426492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,4096,65536,0.10843199888865154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,16384,0.04035946528116862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,7168,0.024651734034220378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,8192,0.02601066629091899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,6144,0.023525333404541014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,5120,0.021181867520014445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,12288,0.03414719899495443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,10240,0.03159679969151814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,4096,0.019777067502339683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,2048,0.016590933005015053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,3584,0.01930239995320638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,2560,0.018114133675893148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,3072,0.018911999464035035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,1536,0.016220800081888833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,1024,0.01609493295351664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,512,0.015460266669591268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,768,0.01578133304913839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,256,0.014912000298500061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,128,0.014698666334152222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,64,0.014820266763369241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,32,0.015033599734306336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3584,65536,0.1042464017868042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,12288,0.0323360006014506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,8192,0.02754453420639038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,10240,0.029526400566101074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,7168,0.022796799739201866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,16384,0.03763733307520549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,6144,0.02172373334566752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,5120,0.020947200059890748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,4096,0.019883733987808228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,2048,0.016987733046213784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,3072,0.018247467279434205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,3584,0.019028266270955406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,2560,0.01734506686528524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,1536,0.01667413314183553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,768,0.01553600033124288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,512,0.015299200018246969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,1024,0.015759999553362526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,256,0.01482133368651072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,128,0.014729600151379904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,64,0.014898133277893067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,65536,0.0944543997446696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,3072,32,0.014959999918937683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,16384,0.03423466682434082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,12288,0.029710932572682695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,10240,0.02751680016517639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,6144,0.022104533513387044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,8192,0.024471465746561685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,5120,0.020533333222071327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,7168,0.022974934180577597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,4096,0.01952000061670939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,3584,0.01879040002822876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,768,0.015656532843907674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,2560,0.017193599541982015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,3072,0.01842666665712992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,1536,0.016506666938463845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,2048,0.016807466745376587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,1024,0.015982932845751443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,256,0.014865066607793173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,512,0.015212800105412802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,65536,0.08569066524505616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,128,0.014939733346303306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,32,0.014980266491572062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2560,64,0.0147189329067866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,12288,0.02731200059254964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,10240,0.025778132677078246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,8192,0.023177599906921385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,16384,0.03176640073458354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,7168,0.02198293407758077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,6144,0.02133973240852356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,5120,0.019987199703852335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,4096,0.019296000401178993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,3072,0.017573332786560057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,2560,0.017131733894348144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,3584,0.018052266041437785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,2048,0.016615466276804606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,1536,0.016235733032226564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,512,0.015105066696802774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,768,0.015718400478363037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,65536,0.07545813719431559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,256,0.014769066373507181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,1024,0.01590933303038279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,128,0.014677332838376364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,64,0.014802133043607077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,2048,32,0.014903466900189719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,16384,0.02829013268152873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,12288,0.025730133056640625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,10240,0.02439039945602417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,4096,0.018793600797653198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,7168,0.021875200668970744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,8192,0.02250666618347168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,6144,0.021293866634368896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,5120,0.019658666849136353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,3584,0.018125865856806436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,2560,0.01690559983253479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,3072,0.017614932854970296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,65536,0.06632320086161295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,2048,0.016434133052825928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,256,0.014780799547831217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,512,0.015050666530927024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,1024,0.015863466262817382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,1536,0.015864533185958863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,768,0.015577600399653117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,64,0.014726400375366211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,128,0.014487466216087342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1536,32,0.014601600170135499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,7168,0.021910399198532104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,8192,0.022503467400868733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,12288,0.024977066119511924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,16384,0.027592533826828004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,10240,0.023305600881576537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,6144,0.02174506584803263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,5120,0.01919040083885193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,4096,0.018221867084503175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,65536,0.056917333602905275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,3584,0.017871999740600587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,3072,0.017550933361053466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,2560,0.016862932840983072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,2048,0.016269866625467935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,768,0.015287466843922935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,1536,0.016011733810106912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,1024,0.015603199601173401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,512,0.015201066931088766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,256,0.014814933141072592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,128,0.014681599537531533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,64,0.01456000010172526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,1024,32,0.014567466576894126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,10240,0.022503467400868733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,12288,0.02396799921989441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,16384,0.025514666239420575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,4096,0.01823893388112386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,8192,0.02135253349939982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,6144,0.020555732647577922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,5120,0.018879999717076622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,7168,0.02068693240483602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,65536,0.05386879841486612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,3584,0.017734400431315103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,3072,0.017498666048049928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,2560,0.016768000523249307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,2048,0.016459733247756958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,1536,0.015939199924468996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,1024,0.015541332960128783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,512,0.01498133341471354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,768,0.015643733739852905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,64,0.01453439990679423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,256,0.01455893317858378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,128,0.01455466647942861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,768,32,0.014641066392262777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,8192,0.020759467283884683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,12288,0.021166932582855225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,16384,0.023693867524464927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,10240,0.020757333437601725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,7168,0.020549333095550536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,6144,0.02026453415552775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,4096,0.01807039976119995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,5120,0.018833067019780478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,3584,0.01775040030479431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,65536,0.04814293384552002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,3072,0.01709973414738973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,2560,0.01681386629740397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,1024,0.015508266290028891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,2048,0.0164000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,1536,0.015999999642372132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,512,0.014985600113868713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,768,0.01554026703039805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,256,0.014646400014559427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,128,0.014417066176732381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,32,0.01452906628449758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,512,64,0.014618666966756186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,12288,0.0211136003335317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,16384,0.02251840035120646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,6144,0.020071466763814293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,10240,0.020916267236073812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,7168,0.020428800582885744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,65536,0.04123946825663249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,8192,0.02020906607309977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,5120,0.018706132968266807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,4096,0.017915733655293784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,3072,0.017156267166137697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,3584,0.017574399709701538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,2560,0.01663146714369456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,2048,0.01607253352801005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,1536,0.01590079963207245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,1024,0.015402666727701821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,768,0.015348266561826071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,256,0.014589866995811463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,512,0.014920533696810404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,128,0.014364799857139588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,64,0.014533332983652749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,256,32,0.014593066771825156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,10240,0.0206389327843984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,16384,0.022184532880783082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,8192,0.020039467016855876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,12288,0.021082667509714763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,65536,0.03779733180999756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,7168,0.020257065693537392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,4096,0.01796906590461731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,5120,0.018550399939219156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,6144,0.020035199324289956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,3072,0.01727573275566101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,3584,0.017525333166122436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,2560,0.016694400707880655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,2048,0.01637440025806427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,1024,0.015530666708946228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,1536,0.015762133399645488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,256,0.014758400122324624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,768,0.015020799636840821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,128,0.01439466675122579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,512,0.015174399813016257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,64,0.014550399780273438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,12288,0.020863999923070274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,128,32,0.014475733041763306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,16384,0.022030933698018392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,8192,0.019802665710449217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,10240,0.020450133085250854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,7168,0.02059626579284668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,65536,0.03564373254776001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,5120,0.018796799580256145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,6144,0.0198634664217631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,4096,0.017897599935531618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,3584,0.017416532834370932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,1024,0.015449600418408713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,3072,0.01708586613337199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,1536,0.015706666310628257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,2560,0.016356266538302102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,2048,0.01617280046145121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,128,0.0146506667137146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,768,0.015129599968592325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,512,0.01490239997704824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,256,0.014510933558146158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,64,0.014326399564743042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,64,32,0.014467199643452963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,12288,0.020866133769353232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,16384,0.02196586728096008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,65536,0.03540266752243042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,10240,0.020515199502309164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,8192,0.019777067502339683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,6144,0.019733333587646486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,5120,0.018557866414388023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,7168,0.020055466890335084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,4096,0.01788053313891093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,3584,0.01762239933013916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,512,0.015215999881426492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,1024,0.01552959978580475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,2048,0.016289066274960837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,3072,0.016845866044362386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,1536,0.01594239970048269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,2560,0.01658986707528432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,768,0.015312000115712484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,128,0.014702933033307395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,256,0.014612266421318054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,64,0.014317867159843446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,96,32,32,0.014171733458836874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,4096,0.09098666508992513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,5120,0.10887359778086345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,6144,0.12820266882578532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,7168,0.14511574109395345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,8192,0.16339200337727863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,3072,0.07349226474761963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,3584,0.08124159971872966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,2048,0.05579200188318888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,2560,0.06397226651509604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,10240,0.19950720469156902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,1024,0.03750826517740886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,512,0.02687679926554362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,1536,0.0463487982749939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,256,0.021733333667119346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,768,0.032814933856328325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,128,0.020473599433898926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,32,0.01927893360455831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,64,0.01858133276303609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,12288,0.23350613911946616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,7168,0.04610986709594726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,8192,0.05050133466720581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,10240,0.058627200126647946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,6144,0.044633599122365315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,12288,0.06734506289164224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,65536,16384,0.3017535845438639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,16384,0.0839733362197876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,4096,0.033403734366099044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,5120,0.03793066740036011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,2560,0.026804266373316447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,3584,0.031523199876149495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,3072,0.029128533601760865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,1024,0.01975040038426717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,768,0.018681599696477254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,1536,0.022016000747680665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,2048,0.024929066499074302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,512,0.017123200496037803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,64,0.015818666418393454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,32,0.015924266974131265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,128,0.01557866632938385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,256,0.01623679995536804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,8192,0.04347840150197347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,6144,0.03634453217188517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,7168,0.039700265725453696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,10240,0.049806932608286544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,12288,0.054820267359415684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,16384,0.07036693096160888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,5120,0.03367146650950114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,3072,0.026042666037877398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,4096,0.029637332757314044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,3584,0.027885866165161134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,2560,0.023717333873113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,2048,0.02211093306541443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,1024,0.019092265764872232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,1536,0.02086506684621175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,768,0.017968000968297322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,512,0.015897599856058757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,256,0.015698132912317912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,64,0.015577600399653117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,128,0.015335466464360556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,32,0.015681067109107973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,7168,0.03592640161514282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,10240,0.045201067129770914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,8192,0.0386186679204305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,12288,0.04986879825592041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,16384,0.07354559898376464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,6144,0.032737066348393754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,5120,0.030430932839711506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,4096,0.026923733949661254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,3584,0.025243733326594037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,16384,65536,0.30395625432332357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,3072,0.02395306626955668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,2560,0.02222613294919332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,2048,0.02113920052846273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,1024,0.018441599607467652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,1536,0.0195850670337677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,768,0.01758293310801188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,256,0.01569493313630422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,512,0.015994667013486227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,128,0.015401599804560342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,12288,65536,0.2443936030069987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,64,0.015236266454060874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,32,0.015333333611488342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,10240,0.038574934005737305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,8192,0.035923198858896895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,12288,0.04251946608225505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,6144,0.029105067253112793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,7168,0.0315338671207428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,16384,0.05210560162862142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,5120,0.02717973391215007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,2048,0.019978666305541994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,3584,0.023079466819763184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,3072,0.02164906660715739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,2560,0.020578134059906005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,4096,0.02399359941482544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,1024,0.016794667641321818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,1536,0.01886506676673889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,10240,65536,0.2106346607208252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,768,0.0161461333433787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,512,0.01544426679611206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,256,0.015286399920781454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,128,0.015093333522478738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,64,0.015392000476519266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,32,0.015613866845766702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,6144,0.027773867050806682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,7168,0.029732267061869305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,8192,0.03236053387324016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,10240,0.0357749342918396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,12288,0.03949120044708252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,16384,0.04806613524754842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,4096,0.02302079995473226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,3584,0.022326399882634483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,5120,0.025177599986394246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,3072,0.02110613385836283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,2560,0.01999893387158712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,1536,0.018118399381637573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,8192,65536,0.17412586212158204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,2048,0.01922453244527181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,768,0.015948800245920818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,1024,0.015940266847610473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,128,0.015031466881434122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,512,0.015507200360298156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,256,0.015098667144775391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,64,0.015173332889874777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,32,0.01520639955997467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,12288,0.036880000432332354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,10240,0.03328853249549866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,8192,0.030203733841578168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,6144,0.025642667214075727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,7168,0.027515733242034913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,5120,0.02371946573257446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,16384,0.044172799587249754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,7168,65536,0.1604383945465088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,2048,0.01853333314259847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,4096,0.02179626623789469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,3584,0.021126399437586464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,3072,0.020403200387954713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,2560,0.019192532698313395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,1536,0.017930666605631508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,768,0.015796266992886863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,1024,0.015838932991027833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,512,0.015449600418408713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,256,0.015118933717409768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,128,0.014889599879582724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,64,0.015069866180419922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,32,0.015260799725850423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,6144,0.0241376002629598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,8192,0.027962666749954224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,7168,0.025783467292785644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,12288,0.03427199920018514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,10240,0.031152000029881794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,6144,65536,0.1409066677093506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,16384,0.04701120058695475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,2048,0.018320000171661376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,5120,0.022567466894785563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,4096,0.02130133310953776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,3584,0.020472532510757445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,3072,0.019828265905380248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,2560,0.019056000312169395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,1536,0.017240534226099648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,1024,0.016080000003178916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,768,0.016089600324630738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,512,0.01529813309510549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,256,0.015086932977040609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,128,0.014857600132624308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,64,0.014863999684651694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,32,0.01516266663869222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,5120,65536,0.12236906687418621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,7168,0.02402879993120829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,6144,0.022936532894770302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,8192,0.025575466950734455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,5120,0.02141439914703369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,12288,0.03507839838663737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,10240,0.03278719981511434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,16384,0.040822398662567136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,2560,0.018318933248519898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,4096,0.01995840072631836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,3584,0.019774933656056724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,3072,0.018873600165049235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,2048,0.01729493339856466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,1536,0.016302933295567833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,1024,0.015867732961972556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,768,0.015491200486818948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,512,0.015187199910481772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,256,0.01504853367805481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,128,0.014620799819628397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,64,0.01511679987112681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,65536,0.10505279699961345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,4096,32,0.01530880033969879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,6144,0.02185386617978414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,16384,0.0380181352297465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,8192,0.02758293350537618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,10240,0.030425600210825604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,7168,0.022849067052205404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,5120,0.020806399981180827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,12288,0.032979200283686325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,4096,0.01997119983037313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,3584,0.01939520041147868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,3072,0.018557866414388023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,2560,0.0171509325504303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,2048,0.01634880006313324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,1536,0.016130133469899496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,768,0.015618133544921874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,1024,0.015530666708946228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,512,0.015140266219774882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,256,0.014900267124176025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,128,0.014872533082962037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,64,0.014793599645296732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,32,0.014839466412862143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3584,65536,0.09717013041178385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,12288,0.030528000990549725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,16384,0.03561813433965047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,8192,0.026587732632954914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,10240,0.02816213369369507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,6144,0.02422719995180766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,7168,0.025228800376256307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,4096,0.01950826644897461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,5120,0.020523732900619505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,1024,0.015753600001335143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,3584,0.01887680093447367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,3072,0.01853013237317403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,2560,0.016993065675099693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,768,0.015557333827018738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,2048,0.016665599743525186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,1536,0.01620586713155111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,512,0.015103999773661295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,65536,0.08958933353424073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,256,0.014846932888031007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,128,0.014829867084821067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,64,0.01479573349157969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,3072,32,0.014969600240389505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,12288,0.028436267375946046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,10240,0.027005867163340254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,6144,0.021105066935221354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,16384,0.03302826682726542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,5120,0.02030293345451355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,8192,0.024778666098912557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,7168,0.024439465999603272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,4096,0.019448532660802206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,3584,0.019025067488352455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,3072,0.017545600732167564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,2048,0.01696853240331014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,2560,0.017102932929992674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,1536,0.016009599963823954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,512,0.015186132987340293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,1024,0.01586133340994517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,768,0.015495466192563376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,256,0.014788267016410828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,128,0.014739200472831726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,65536,0.0819381316502889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,64,0.014696533481280008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,12288,0.02651306589444478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2560,32,0.014801067113876343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,5120,0.020202666521072388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,10240,0.02486293315887451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,16384,0.030343466997146608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,8192,0.023829332987467446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,7168,0.02168853282928467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,6144,0.020974934101104736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,4096,0.018994132677714028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,3584,0.017989333470662436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,3072,0.017102932929992674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,2560,0.01684373418490092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,2048,0.016691199938456216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,512,0.015085867047309876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,1536,0.01599360009034475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,1024,0.015596800049146018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,768,0.01564479966958364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,65536,0.07167572975158691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,256,0.014654933412869772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,32,0.014823466539382935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,128,0.014805333813031516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,2048,64,0.014873600006103516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,10240,0.02399359941482544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,12288,0.024566400051116943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,16384,0.027459200223286944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,7168,0.022205867369969687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,8192,0.022737065951029457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,6144,0.021463465690612794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,5120,0.019336533546447755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,2560,0.016774400075276693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,4096,0.01856106718381246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,2048,0.016235733032226564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,3584,0.017874133586883546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,3072,0.017245866854985557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,768,0.015467733144760132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,65536,0.061963733037312826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,1536,0.016192000110944113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,512,0.015331199765205384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,1024,0.015703466534614564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,256,0.01472106675306956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,32,0.01474346617857615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,64,0.014740266402562461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1536,128,0.014562132954597472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,8192,0.02093120018641154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,12288,0.023111466566721597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,7168,0.021605332692464195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,16384,0.025553067525227863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,10240,0.022553600867589316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,6144,0.020497065782546998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,5120,0.01944640080134074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,3072,0.017127466201782227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,3584,0.017848533391952515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,4096,0.017607466379801432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,65536,0.053274667263031004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,2560,0.016816000143686928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,2048,0.016434133052825928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,1536,0.01590506633122762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,1024,0.015384533007939658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,768,0.015373866756757101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,512,0.015337600310643514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,256,0.014697600404421488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,64,0.014723199605941772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,128,0.014728533228238425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,1024,32,0.01477120021979014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,12288,0.022992000977198283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,10240,0.022272000710169472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,16384,0.024370133876800537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,8192,0.021629865964253744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,7168,0.020778665939966835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,3584,0.017593600352605186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,6144,0.020498132705688475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,5120,0.01914026737213135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,4096,0.01808213392893473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,65536,0.04988053242365519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,3072,0.01716266671816508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,2560,0.016778665781021117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,1536,0.01609813372294108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,2048,0.016335999965667723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,1024,0.01564479966958364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,512,0.014959999918937683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,768,0.015442132949829102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,256,0.014665599664052328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,128,0.01451520025730133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,32,0.014851199587186179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,768,64,0.014651733636856078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,12288,0.020539732774098714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,6144,0.02000746726989746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,16384,0.02336533268292745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,10240,0.02151573300361633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,7168,0.020013866821924846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,8192,0.020269866784413657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,5120,0.0187008003393809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,65536,0.04424959818522135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,3584,0.017560533682505288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,4096,0.017859200636545815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,2048,0.015964800119400026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,3072,0.017014400164286295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,2560,0.01658453345298767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,1536,0.015867732961972556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,512,0.015033599734306336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,1024,0.015491200486818948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,768,0.015403733650843302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,256,0.014672000209490457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,128,0.014682666460673014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,64,0.014524799585342408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,512,32,0.014696533481280008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,16384,0.022048000494639078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,8192,0.019845332702000937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,10240,0.0209824005762736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,12288,0.02029119928677877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,65536,0.037226665019989016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,7168,0.020042665799458823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,4096,0.018117332458496095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,3584,0.017543466885884602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,6144,0.019569067160288493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,5120,0.018703999121983846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,3072,0.017037866512934367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,2560,0.016323199868202208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,1536,0.01573013365268707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,2048,0.016155733664830526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,768,0.015204266707102457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,1024,0.01555519998073578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,512,0.014748799800872802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,256,0.014636799693107605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,128,0.014502400159835815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,32,0.014441600441932679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,256,64,0.014350933829943338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,16384,0.021272534132003786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,12288,0.020406399170557657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,10240,0.020348799228668214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,65536,0.03610026836395264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,7168,0.02029333313306173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,8192,0.019758933782577516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,6144,0.019529600938161217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,3584,0.017548799514770508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,4096,0.017822933197021485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,5120,0.018568533658981323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,3072,0.01688213348388672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,768,0.015330132842063905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,2560,0.016361600160598753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,1024,0.015153066317240397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,512,0.014726400375366211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,2048,0.016215466459592185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,1536,0.01575573285420736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,256,0.014513066411018372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,64,0.014546133081118264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,128,0.01437440017859141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,128,32,0.01455573340257009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,16384,0.021528534094492593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,10240,0.020382932821909585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,12288,0.020077866315841675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,7168,0.020087466637293497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,8192,0.019798400004704793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,65536,0.03309973279635112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,6144,0.019511467218399046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,4096,0.01767359972000122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,2560,0.016516266266504924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,5120,0.018644267320632936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,3584,0.017373865842819212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,3072,0.01713493267695109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,2048,0.016104533274968465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,1536,0.01555519998073578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,512,0.015038933356602988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,1024,0.01529706617196401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,768,0.015128533045450846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,256,0.014481066664059957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,128,0.01446293294429779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,64,0.014479999740918478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,16384,0.02151040037473043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,64,32,0.014443733294804893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,12288,0.0202346662680308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,10240,0.02028800050417582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,5120,0.018593066930770875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,65536,0.03447893460591634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,8192,0.019578667481740315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,7168,0.020096000035603842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,6144,0.019403733809789023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,4096,0.017774933576583864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,2560,0.016795732577641807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,3072,0.016825600465138753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,3584,0.017434666554133095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,2048,0.015758933623631795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,1536,0.015599999825159708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,256,0.014607999722162882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,1024,0.01530026694138845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,768,0.015159466862678527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,512,0.014686933159828186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,128,0.014396799604098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,64,0.014402133226394654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,80,32,32,0.014197333653767904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,4096,0.08904746373494467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,6144,0.12659733295440673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,5120,0.10744640032450359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,7168,0.14302612940470377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,8192,0.16189440091451007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,3072,0.07184747060139975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,3584,0.07971519629160563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,2560,0.06215146780014038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,10240,0.19774293899536133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,2048,0.054271999994913736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,1536,0.04514026641845703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,1024,0.0360213319460551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,512,0.025282132625579833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,256,0.021373866001764934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,768,0.03150293429692586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,12288,0.23284799257914224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,32,0.018131200472513834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,64,0.01805013418197632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,128,0.01900586684544881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,8192,0.05060693422953287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,6144,0.04389013449350993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,7168,0.046914132436116536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,10240,0.06378879944483438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,12288,0.07040106455485026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,5120,0.03824746608734131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,3072,0.02943146626154582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,65536,16384,0.3009610811869303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,16384,0.08546986579895019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,4096,0.03431573311487834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,3584,0.031439999739329025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,2560,0.027036799987157183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,1536,0.021549866596857706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,2048,0.024040534098943075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,768,0.018521600961685182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,1024,0.02004800041516622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,512,0.01675093372662862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,256,0.015614933768908181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,64,0.01578133304913839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,128,0.01536853313446045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,32,0.015703466534614564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,6144,0.03500159978866577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,8192,0.04123306671778361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,7168,0.03787306547164917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,10240,0.04755200147628784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,12288,0.05335466861724854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,16384,0.06595306793848674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,4096,0.028550400336583452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,5120,0.03177173336346944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,3584,0.02666986584663391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,2560,0.02320746580759684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,3072,0.024924800793329874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,1024,0.01825493375460307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,1536,0.019707733392715455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,2048,0.02138026754061381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,768,0.017240534226099648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,512,0.015802666544914246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,256,0.015438933173815408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,128,0.01520746648311615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,64,0.015225600202878317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,32,0.015451733271280924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,8192,0.03745493491490682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,10240,0.04256853262583415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,7168,0.03460586468378703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,12288,0.04761386712392171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,16384,0.05803093512852987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,6144,0.03177280028661092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,5120,0.029794132709503172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,4096,0.027052799860636394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,3072,0.023502933979034423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,3584,0.025092266003290814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,16384,65536,0.2967498779296875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,2048,0.020922666788101195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,2560,0.02204906741778056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,1536,0.019293866554896035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,1024,0.017835734287897746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,256,0.015161599715550741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,768,0.016309332847595216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,512,0.015035733580589294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,128,0.014983466267585755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,64,0.015097600221633912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,12288,65536,0.2367466608683268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,32,0.015103999773661295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,10240,0.0385696013768514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,12288,0.04291306734085083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,6144,0.028828799724578857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,7168,0.033070933818817136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,8192,0.03473066488901774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,16384,0.05113813479741415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,5120,0.027194666862487792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,1536,0.018669867515563966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,3584,0.023235199848810832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,4096,0.024130133787790935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,3072,0.021988266706466676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,2560,0.020917334159215293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,2048,0.01976319948832194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,1024,0.016238933801651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,10240,65536,0.20406932830810548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,512,0.01573013365268707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,768,0.01574720044930776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,256,0.015230933825174967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,128,0.015159466862678527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,64,0.01532373329003652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,32,0.015458133816719056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,6144,0.026868265867233277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,7168,0.029285333553949994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,8192,0.031564799944559734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,10240,0.035290666421254474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,12288,0.03906986713409424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,16384,0.047226667404174805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,5120,0.025179733832677204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,8192,65536,0.16738346417744954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,4096,0.02313386599222819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,3584,0.0220960001150767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,3072,0.021179733673731486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,2560,0.020124799013137816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,1536,0.0181877334912618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,2048,0.019128533204396565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,64,0.014906666676203408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,1024,0.0161461333433787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,768,0.01596799989541372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,256,0.015102932850519816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,512,0.015507200360298156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,128,0.014793599645296732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,32,0.015166933337847391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,7168,0.0272053341070811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,10240,0.033137067159016924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,8192,0.029927466313044233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,12288,0.03626453479131063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,16384,0.043831467628479004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,6144,0.02550506591796875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,2560,0.019543466965357463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,3072,0.020428800582885744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,5120,0.02331413427988688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,4096,0.022114133834838866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,3584,0.02119999925295512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,7168,65536,0.1520448048909505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,2048,0.01872106591860453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,1536,0.01726933320363363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,1024,0.016190933187802632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,32,0.015075199802716575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,768,0.015575466553370157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,512,0.015511467059453329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,256,0.015058133006095886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,128,0.014813866217931113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,64,0.014915200074513755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,6144,0.023628799120585124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,7168,0.024973867336908977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,8192,0.02770773371060689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,16384,0.04490880171457927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,10240,0.0303658664226532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,12288,0.03337920109430949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,6144,65536,0.13464959462483722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,3072,0.01948053240776062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,5120,0.0221343994140625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,4096,0.020563199122746786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,3584,0.020054399967193604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,2560,0.018685867389043175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,2048,0.017914666732152303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,1024,0.015777066349983215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,1536,0.016430933276812235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,512,0.015078399578730264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,768,0.015582933028539022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,256,0.01488746702671051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,32,0.014957867066065469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,128,0.014883200327555338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,64,0.014837333559989929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,8192,0.024717867374420166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,10240,0.02752106587092082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,6144,0.022221867243448892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,7168,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,12288,0.030007465680440264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,16384,0.038882132371266684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,5120,65536,0.1175007979075114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,5120,0.021112533410390218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,4096,0.019874133666356406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,1024,0.015577600399653117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,3584,0.019375999768575035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,3072,0.018526933590571084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,2048,0.01699840029080709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,2560,0.017944532632827758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,1536,0.01586666703224182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,512,0.015309866269429526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,768,0.015493333339691162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,256,0.014872533082962037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,128,0.014882133404413859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,64,0.014656000336011252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,32,0.014781866470972696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,4096,65536,0.09946239789326985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,8192,0.027506132920583088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,7168,0.025939200321833295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,5120,0.02058560053507487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,16384,0.03752640088399251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,6144,0.02148266633351644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,12288,0.03205440044403076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,10240,0.029226666688919066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,3072,0.01836586594581604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,3584,0.01877440015474955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,2560,0.017467733224232992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,4096,0.01946773330370585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,2048,0.016561067104339598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,1024,0.015513599912325541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,1536,0.016060800353686015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,768,0.015470932920773825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,512,0.015033599734306336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,256,0.014763733744621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,128,0.014860799908638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,64,0.014788267016410828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,32,0.014828800161679586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3584,65536,0.08997120062510172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,10240,0.027422932783762614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,16384,0.035019731521606444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,8192,0.02325973312060038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,4096,0.01917546590169271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,7168,0.022039467096328737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,12288,0.02953280011812846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,6144,0.021134932835896812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,5120,0.02054826617240906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,2560,0.01805866758028666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,3584,0.018588799238204955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,3072,0.01752106746037801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,1536,0.016237866878509522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,1024,0.015397333105405173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,2048,0.01700693368911743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,768,0.01566933294137319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,512,0.015247999628384908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,256,0.014941866199175516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,128,0.014825600385665893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,64,0.0145578662554423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,32,0.014616533120473226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,3072,65536,0.08460799853006998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,12288,0.027847466866175334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,10240,0.02611946662267049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,16384,0.03188479940096538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,8192,0.02256106734275818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,7168,0.021626667181650797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,6144,0.020900267362594604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,3584,0.0187882661819458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,5120,0.02011093298594157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,4096,0.019036799669265747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,1024,0.01579093337059021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,3072,0.017914666732152303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,2048,0.01644373337427775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,1536,0.016109866897265117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,2560,0.016616533199946083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,512,0.015282133221626281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,768,0.015501866738001505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,256,0.014908799529075622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,64,0.014535466829935709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,65536,0.07668053309122722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,32,0.014707199732462563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2560,128,0.014805333813031516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,10240,0.02469866673151652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,12288,0.02574613293011983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,16384,0.029090134302775066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,8192,0.023229867219924927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,7168,0.022269866863886514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,4096,0.018235733111699425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,6144,0.021833600600560506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,5120,0.01959786613782247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,3584,0.017646932601928712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,3072,0.017223467429478966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,1536,0.016132266322771708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,2560,0.01676693360010783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,2048,0.016416000326474507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,1024,0.015415466825167336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,768,0.015477333466211954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,65536,0.06696853637695313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,128,0.014628266294797262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,512,0.015098667144775391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,256,0.014678399761517844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,64,0.014684800306955972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,2048,32,0.014731733004252115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,10240,0.023471999168395995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,16384,0.026526933908462523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,8192,0.02227733333905538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,7168,0.022014933824539184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,12288,0.024166399240493776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,6144,0.02130240003267924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,4096,0.018127999703089395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,5120,0.018917334079742432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,2560,0.016683733463287352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,3584,0.017771732807159425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,3072,0.017016534010569254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,2048,0.016318933169047038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,1536,0.015923200050989787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,1024,0.015763200322786965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,65536,0.05842026472091675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,512,0.015338666240374246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,768,0.0153546671072642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,256,0.01479039986928304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,128,0.014565333724021912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,64,0.014677332838376364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1536,32,0.014521599809328715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,12288,0.022294400135676067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,16384,0.02434239983558655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,10240,0.021959465742111207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,4096,0.017895466089248656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,8192,0.02111999988555908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,6144,0.019843200842539467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,7168,0.02086826761563619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,5120,0.018321067094802856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,65536,0.04920533498128255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,3584,0.01729493339856466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,2560,0.01657386620839437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,3072,0.01692053278287252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,512,0.015326933066050211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,1536,0.01593493322531382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,2048,0.016424533724784852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,1024,0.015296000242233276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,768,0.015125333269437154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,128,0.014641066392262777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,256,0.01469546655813853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,32,0.014803199966748556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,1024,64,0.014651733636856078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,8192,0.02030186653137207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,12288,0.021458133061726888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,16384,0.02318293253580729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,10240,0.02095680038134257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,7168,0.02020053267478943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,6144,0.019988266626993816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,4096,0.017691733439763387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,5120,0.018552533785502114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,65536,0.04576319853464762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,3584,0.017338667313257852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,3072,0.017182934284210204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,2560,0.016421332955360413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,2048,0.01623679995536804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,1024,0.01535146633783976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,1536,0.01575040022532145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,768,0.015436800320943198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,512,0.015124266346295675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,256,0.014627200365066529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,128,0.014733866850535075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,64,0.014389333128929139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,16384,0.021713066101074218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,768,32,0.01443839967250824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,12288,0.020409599939982096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,5120,0.01867626706759135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,10240,0.019898666938145956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,65536,0.039827199776967366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,8192,0.019986132780710854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,7168,0.020275199413299562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,6144,0.019645865758260092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,4096,0.017595734198888144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,3072,0.016857600212097167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,3584,0.017400532960891724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,2560,0.016459733247756958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,2048,0.01635840038458506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,1536,0.015864533185958863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,1024,0.015335466464360556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,256,0.014870400230089823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,768,0.015010133385658264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,512,0.014892799655596414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,128,0.014697600404421488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,32,0.014531200130780539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,512,64,0.01453013320763906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,12288,0.019935999313990274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,8192,0.01978773276011149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,16384,0.021229867140452066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,10240,0.020182400941848755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,65536,0.03396799961725871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,4096,0.01779200037320455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,6144,0.01999573310216268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,7168,0.02007360061009725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,5120,0.018603734175364175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,3584,0.01733760039011637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,3072,0.017036799589792886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,2560,0.016390400131543477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,1536,0.01579093337059021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,1024,0.0151829332113266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,2048,0.0162581334511439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,768,0.01518186628818512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,512,0.015108266472816467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,32,0.014698666334152222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,256,0.014522666732470194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,128,0.01463573376337687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,256,64,0.01440000037352244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,8192,0.01986133257548014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,16384,0.021002666155497233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,12288,0.020049067338307698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,65536,0.031226666768391927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,10240,0.020196266969045005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,7168,0.02007360061009725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,6144,0.019511467218399046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,5120,0.018413867553075156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,4096,0.01772480010986328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,3584,0.017169066270192466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,2560,0.01646933356920878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,3072,0.01683626572291056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,512,0.014842666188875833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,2048,0.016337066888809204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,1536,0.015731199582417806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,1024,0.015312000115712484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,768,0.015056000153223673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,32,0.014660267035166421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,128,0.014451199769973755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,256,0.014524799585342408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,128,64,0.014413866400718688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,12288,0.01997973322868347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,10240,0.019678932428359986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,16384,0.020513067642847695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,8192,0.01942080060640971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,65536,0.029439999659856157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,7168,0.019694934288660683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,5120,0.018286933501561485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,6144,0.019294933478037516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,2560,0.016475733121236166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,4096,0.017681066195170084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,3584,0.016807466745376587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,1536,0.01565439999103546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,3072,0.01692906618118286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,2048,0.01632213294506073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,1024,0.015165866414705912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,768,0.015052800377209982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,512,0.014862933754920959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,64,0.014401066303253173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,256,0.014689067006111145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,128,0.014189866185188294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,64,32,0.01437013347943624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,8192,0.01932906707127889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,65536,0.030087467034657794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,10240,0.02016213337580363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,16384,0.020933334032694498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,12288,0.019769599040349327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,7168,0.019770665963490804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,6144,0.019037866592407228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,5120,0.018312533696492515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,4096,0.017437867323557534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,3072,0.01685439944267273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,3584,0.017206400632858276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,2048,0.01619733373324076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,2560,0.016375466187795003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,1536,0.01597866714000702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,1024,0.014973866939544677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,512,0.014834133783976236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,768,0.014991999665896098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,256,0.01481066644191742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,128,0.014546133081118264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,64,0.014286933342615762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,64,32,32,0.014311466614405313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,4096,0.0834773302078247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,5120,0.10017066796620686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,6144,0.1172223965326945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,7168,0.13417174021402994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,8192,0.14936960538228353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,10240,0.18305706977844238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,2048,0.051726933320363364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,3584,0.07481919924418132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,3072,0.06716907024383545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,2560,0.05863999923070272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,1536,0.042625065644582114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,768,0.029838933547337847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,512,0.024909865856170655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,1024,0.03666133483250936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,256,0.020326399803161622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,128,0.01834026575088501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,32,0.017748266458511353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,64,0.017407999436060587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,12288,0.2144597371419271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,8192,0.04907199939092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,6144,0.04330240090688069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,7168,0.04441386858622233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,10240,0.05654186805089315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,12288,0.06418879826863608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,65536,16384,0.281989320119222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,16384,0.08021439711252848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,5120,0.03648213148117065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,3584,0.030044800043106078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,4096,0.031981867551803586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,3072,0.028204800685246785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,1536,0.021388800938924153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,1024,0.01949866612752279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,2048,0.025095466772715253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,2560,0.02600746750831604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,768,0.018411733706792197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,512,0.015702399611473083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,64,0.015467733144760132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,128,0.015268266201019287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,256,0.015709867080052696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,32,0.015706666310628257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,8192,0.04210026661554973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,6144,0.035308798154195145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,7168,0.03854399919509888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,10240,0.04866346518198649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,12288,0.054788267612457274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,3072,0.025154133637746174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,16384,0.0648746649424235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,5120,0.03218773404757182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,3584,0.027011199792226152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,2560,0.02336639960606893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,4096,0.028983465830485028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,2048,0.02177600065867106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,1024,0.018552533785502114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,1536,0.01999573310216268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,768,0.017387733856836955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,256,0.015682133038838704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,512,0.015449600418408713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,128,0.015174399813016257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,64,0.015174399813016257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,32,0.015320533514022827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,8192,0.036986664930979414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,10240,0.04351466496785482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,7168,0.03433599869410197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,12288,0.047312001387278244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,16384,0.058050131797790526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,6144,0.031642667452494305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,5120,0.029269333680470782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,16384,65536,0.28868694305419923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,3072,0.02323413292566935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,4096,0.025990400711695356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,3584,0.024220800399780272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,2048,0.02071466644605001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,2560,0.02178666591644287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,1024,0.01796906590461731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,1536,0.019953066110610963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,768,0.016268799702326454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,512,0.01551040013631185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,256,0.015564800302187601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,128,0.014987732966740927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,64,0.015242666999499003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,32,0.0151936004559199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,12288,65536,0.22959893544514975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,10240,0.03720853328704834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,12288,0.04122879902521769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,8192,0.03321173389752706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,7168,0.030587732791900635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,6144,0.029833600918451948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,16384,0.049142400423685714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,5120,0.026139734188715617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,3584,0.022728532552719116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,4096,0.023635200659434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,3072,0.021447465817133585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,2048,0.019359999895095827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,2560,0.020547199249267577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,1536,0.018398932615915933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,768,0.016099199652671814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,1024,0.016812799374262492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,10240,65536,0.19866773287455242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,64,0.015079466501871744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,512,0.015664000312487283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,32,0.015421866377194723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,256,0.015066666404406228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,128,0.014843733112017313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,8192,0.0318122665087382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,6144,0.026971733570098876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,7168,0.0287989338239034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,12288,0.03847039937973022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,16384,0.04875946839650472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,10240,0.03458666801452637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,3072,0.02104426622390747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,5120,0.025142399470011394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,4096,0.022667733828226726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,3584,0.0217141330242157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,2560,0.02005866765975952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,2048,0.01897066632906596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,8192,65536,0.16108585993448893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,1536,0.018179200092951455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,1024,0.016691199938456216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,768,0.01576640009880066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,512,0.015357866883277893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,256,0.015081600348154704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,32,0.015042133132616677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,128,0.014786133170127868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,64,0.014816000064214071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,8192,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,7168,0.02730453411738078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,6144,0.0250709335009257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,10240,0.03263253370920817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,12288,0.03585280179977417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,5120,0.023690666755040488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,16384,0.042903467019399004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,2048,0.01881813406944275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,7168,65536,0.15136000315348308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,2560,0.019688532749811808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,3584,0.020810665686925252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,4096,0.022038400173187256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,3072,0.020564266045888267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,1024,0.01597866714000702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,1536,0.01687999963760376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,768,0.015109333395957946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,512,0.015337600310643514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,32,0.015007999539375306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,256,0.014874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,128,0.014877866705258688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,64,0.015025066335995993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,6144,0.023563732703526817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,10240,0.029752532641092937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,7168,0.024785067637761435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,12288,0.03251413305600484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,8192,0.026971733570098876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,16384,0.04262719949086507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,4096,0.02061333258946737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,6144,65536,0.12870506445566815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,5120,0.022117332617441813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,3072,0.01921066641807556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,1536,0.016214399536450704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,2048,0.018122667074203493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,3584,0.019828265905380248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,2560,0.018684800465901694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,1024,0.016013866662979125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,256,0.014878933628400167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,768,0.015638400117556253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,128,0.014866133530934652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,512,0.015236266454060874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,32,0.014919466773668923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,64,0.014830933014551798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,16384,0.037333333492279054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,7168,0.02613866726557414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,6144,0.024959999322891235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,10240,0.02988586624463399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,8192,0.027289599180221558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,12288,0.03231893380482991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,5120,65536,0.11097599665323894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,5120,0.02068159977595011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,4096,0.019719467560450236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,3072,0.01880853374799093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,3584,0.01912213365236918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,1024,0.01569386621316274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,2560,0.017744000752766928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,2048,0.01722773313522339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,1536,0.016296533743540446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,128,0.014638933539390563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,512,0.015285332997639975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,768,0.01544319987297058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,256,0.01480959951877594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,64,0.014759467045466105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,32,0.015110400319099427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,4096,65536,0.09415679772694906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,8192,0.02639893293380737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,16384,0.035507198174794516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,6144,0.024486400683720908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,7168,0.025029333432515462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,5120,0.020427733659744263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,12288,0.030834132432937623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,10240,0.02831679979960124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,2048,0.016350932916005454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,3584,0.018984532356262206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,4096,0.019234132766723634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,1536,0.01618666648864746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,3072,0.0183786670366923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,2560,0.017037866512934367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,1024,0.015503999590873719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,768,0.015426133076349893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,512,0.015092266599337259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,128,0.014765866597493491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,256,0.014680533607800802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,64,0.014803199966748556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,32,0.014631467064221701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3584,65536,0.08898986975351969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,16384,0.03358506759007772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,10240,0.027261867125829058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,12288,0.029357866446177168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,6144,0.023669334252675374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,7168,0.024621866146723428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,8192,0.025461333990097045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,5120,0.019985065857569376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,3072,0.018202666441599527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,4096,0.018870399395624796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,3584,0.018953599532445273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,2048,0.016659200191497803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,2560,0.016755199432373045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,1536,0.016089600324630738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,768,0.015346133708953857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,1024,0.015852800011634825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,512,0.015348266561826071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,256,0.014862933754920959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,64,0.014674133062362671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,65536,0.07953706582387289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,128,0.01471573313077291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,3072,32,0.014871467153231302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,12288,0.02691733241081238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,10240,0.025566933552424113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,4096,0.018645334243774413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,8192,0.02403093377749125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,16384,0.03075733383496602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,7168,0.023248000939687093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,6144,0.02248106598854065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,5120,0.019821866353352865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,3072,0.017547732591629027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,3584,0.01813760002454122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,2560,0.01660693287849426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,2048,0.016389333208402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,512,0.015184000134468079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,1536,0.015980799992879234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,1024,0.015715199708938598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,768,0.015403733650843302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,65536,0.07141226927439372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,256,0.014827733238538107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,128,0.014545067151387533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,32,0.014803199966748556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2560,64,0.014538666605949402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,6144,0.020272000630696615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,12288,0.024915200471878052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,7168,0.02071466644605001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,10240,0.0245088001092275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,16384,0.028123732407887774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,8192,0.02326186696688334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,5120,0.019371734062830607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,4096,0.01808746655782064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,3584,0.018267732858657838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,3072,0.017780266205469766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,2560,0.01672640045483907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,2048,0.016454399625460307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,1536,0.01606826682885488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,768,0.015287466843922935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,1024,0.015706666310628257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,65536,0.062209065755208334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,128,0.01455573340257009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,256,0.01467626690864563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,512,0.014997333288192749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,64,0.01474453310171763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,2048,32,0.014882133404413859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,10240,0.02256106734275818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,16384,0.025880533456802367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,12288,0.023538132508595787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,8192,0.021692800521850585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,7168,0.0212991992632548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,4096,0.01792853275934855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,6144,0.02002453406651815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,5120,0.01914880077044169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,3584,0.017219199736913045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,2560,0.016489600141843162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,65536,0.05426026582717895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,1536,0.016039466857910155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,3072,0.016927999258041383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,2048,0.016404267152150473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,512,0.014958932995796204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,1024,0.015602133671442666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,768,0.015380266308784484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,64,0.014567466576894126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,128,0.014702933033307395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,256,0.014633599917093912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1536,32,0.014669866363207499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,8192,0.02150719960530599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,7168,0.021128533283869426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,16384,0.024280534187952677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,6144,0.019564799467722573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,12288,0.023502933979034423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,10240,0.022395733992258707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,5120,0.018599466482798258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,4096,0.017758933703104655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,3584,0.01729066570599874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,3072,0.017151999473571777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,2560,0.016453333695729575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,65536,0.045134933789571126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,2048,0.016506666938463845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,1536,0.015982932845751443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,768,0.015235199530919393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,1024,0.015415466825167336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,512,0.015077333648999533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,256,0.014604799946149192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,128,0.014341333508491516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,64,0.014539733529090881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,1024,32,0.014735999703407287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,16384,0.02290346622467041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,10240,0.021051732699076335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,12288,0.021160533030827842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,8192,0.02138239940007528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,3584,0.017308799425760905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,7168,0.019700266917546592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,4096,0.017778132359186807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,6144,0.019767467180887857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,65536,0.04151466687520345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,5120,0.01849173307418823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,3072,0.01697493394215902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,2048,0.01623146633307139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,2560,0.01634666621685028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,1024,0.015658666690190635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,1536,0.015703466534614564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,768,0.015170133113861084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,256,0.014657066265741984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,512,0.0150218665599823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,64,0.014726400375366211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,128,0.014460800091425577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,768,32,0.01460906664530436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,7168,0.01999359925587972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,16384,0.021912533044815063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,12288,0.0206389327843984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,10240,0.020504534244537354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,8192,0.019884800910949706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,65536,0.035973334312438966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,6144,0.019427200158437095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,4096,0.01739733417828878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,5120,0.018244266510009766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,3584,0.01717653274536133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,2560,0.016359466314315795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,3072,0.016965333620707193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,768,0.015141333142916361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,2048,0.016340266664822897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,1536,0.015773866573969522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,1024,0.015264000495274863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,512,0.014727466305096946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,32,0.014607999722162882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,256,0.014574933052062988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,128,0.014545067151387533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,512,64,0.01461013356844584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,16384,0.020489599307378134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,8192,0.019378133614857993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,65536,0.031222399075826007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,12288,0.019636267423629762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,10240,0.020029866695404054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,7168,0.01953493356704712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,6144,0.019396267334620156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,3584,0.017118932803471883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,5120,0.018343466520309448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,4096,0.01766506632169088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,2560,0.016149333119392394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,3072,0.016936532656351724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,1536,0.015542399883270264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,2048,0.01609173317750295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,768,0.015147733688354491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,1024,0.015471999843915304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,256,0.014505599935849508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,512,0.014929067095120749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,128,0.014341333508491516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,32,0.014341333508491516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,256,64,0.014613333344459533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,16384,0.020242132743199668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,10240,0.01971413294474284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,65536,0.028913066784540815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,12288,0.019771732886632285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,8192,0.01938026746114095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,7168,0.019756799936294554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,6144,0.019049600760142008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,4096,0.017401599884033205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,5120,0.01841493248939514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,3584,0.01723946730295817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,3072,0.016902399063110352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,2560,0.0164192001024882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,2048,0.016125866770744325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,1024,0.015127467115720114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,256,0.014512000481287637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,768,0.01513706644376119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,1536,0.01574186682701111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,512,0.014817066987355552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,128,0.014359466234842935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,16384,0.02014933427174886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,32,0.014494933684666953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,128,64,0.01456106702486674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,12288,0.019757866859436035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,10240,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,65536,0.026754132906595868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,8192,0.01932906707127889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,6144,0.019167999426523842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,7168,0.0195850670337677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,3584,0.017132800817489625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,5120,0.01815999945004781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,4096,0.017476266622543334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,2560,0.01616426706314087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,3072,0.01667840083440145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,2048,0.016189866264661155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,1536,0.015705600380897522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,768,0.015012266238530478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,1024,0.015292800466219583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,512,0.014705066879590353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,256,0.014628266294797262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,128,0.014216533303260804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,16384,0.02039253314336141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,64,0.014309333761533103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,64,32,0.014669866363207499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,65536,0.027134933074315387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,12288,0.019618133703867592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,6144,0.019476266702016194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,7168,0.01935466726620992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,10240,0.019639466206232706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,8192,0.01927893360455831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,5120,0.018244266510009766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,4096,0.017612799008687337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,3584,0.017163733641306557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,2560,0.016498133540153503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,3072,0.01662506659825643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,2048,0.015989333391189575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,1536,0.015601066748301187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,1024,0.015313067038853965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,128,0.014426666498184203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,768,0.015031466881434122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,256,0.014417066176732381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,512,0.014800000190734863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,64,0.014341333508491516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,48,32,32,0.01437226633230845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,4096,0.08261760075887045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,5120,0.09879786968231201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,6144,0.11599146525065104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,7168,0.13083626429239908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,8192,0.1484447956085205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,2048,0.05026559829711914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,3584,0.0735210657119751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,10240,0.1829482714335124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,2560,0.05752213398615519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,3072,0.06588586568832397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,512,0.02523946762084961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,1536,0.041679998238881424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,1024,0.033470932642618814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,256,0.019896533091862997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,128,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,768,0.02911253372828166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,64,0.017087999979654947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,12288,0.21346346537272134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,32,0.017193599541982015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,8192,0.04835093418757121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,6144,0.03945706685384114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,7168,0.04686400095621745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,10240,0.055371733506520596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,12288,0.0635477344195048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,65536,16384,0.27906986872355144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,5120,0.03607999881108602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,16384,0.07945813337961832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,4096,0.03150399923324585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,3584,0.02987733284632365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,3072,0.02754666606585185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,1536,0.021248000860214233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,2560,0.02555946707725525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,1024,0.019234132766723634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,128,0.015103999773661295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,2048,0.024238934119542442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,768,0.018119466304779053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,512,0.015921066204706825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,256,0.015307733416557312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,64,0.015214932958285013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,32,0.015460266669591268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,8192,0.04116586844126384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,10240,0.04865920146306356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,12288,0.052774401505788174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,7168,0.03796586592992147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,6144,0.035011200110117595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,16384,0.0682207981745402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,2560,0.02327573299407959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,5120,0.03157973289489746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,3584,0.026217599709828694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,4096,0.028804266452789308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,3072,0.02487893303235372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,2048,0.021517866849899293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,1024,0.01813760002454122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,1536,0.0200437327226003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,512,0.015717333555221556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,768,0.016874667008717856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,256,0.01529706617196401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,128,0.01503679951032003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,64,0.014924800395965577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,32,0.015227733055750528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,7168,0.03410346508026123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,8192,0.03654079834620158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,16384,65536,0.2801941235860189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,12288,0.04708586533864339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,10240,0.04198720057805379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,16384,0.05769386688868204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,4096,0.026971733570098876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,3072,0.022696532805760703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,2560,0.021511467297871907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,5120,0.028549333413441975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,3584,0.024153600136439003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,6144,0.03127786715825399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,2048,0.02034133275349935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,1536,0.019179733594258626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,128,0.014740266402562461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,1024,0.01767786741256714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,768,0.015609600146611533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,512,0.015481600165367126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,256,0.015307733416557312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,12288,65536,0.2260682741800944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,32,0.014876799782117209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,64,0.015046399831771851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,7168,0.029978666702906293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,8192,0.034167468547821045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,10240,0.03669120073318481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,6144,0.027801599105199176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,12288,0.04071040153503418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,16384,0.048842668533325195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,5120,0.02560960054397583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,4096,0.023447465896606446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,3072,0.021268266439437866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,3584,0.02236479918162028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,2560,0.020320000251134236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,2048,0.019211733341217042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,1536,0.018357332547505698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,1024,0.016116266449292503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,10240,65536,0.19148586591084799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,768,0.015288533767064414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,512,0.015495466192563376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,256,0.015100799997647605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,32,0.015153066317240397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,64,0.015024000406265258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,128,0.014710399508476257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,7168,0.02846933404604594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,8192,0.03139839967091878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,6144,0.026333866516749065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,10240,0.03444799979527791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,12288,0.037928533554077146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,4096,0.022524799903233847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,16384,0.045287466049194335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,3584,0.02152106761932373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,5120,0.024456532796223958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,3072,0.02074986696243286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,2560,0.01990933418273926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,2048,0.019132800896962485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,8192,65536,0.1548202673594157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,1536,0.01775146722793579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,1024,0.015212800105412802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,128,0.014673067132631936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,768,0.015358933806419372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,512,0.015187199910481772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,256,0.015179733435312906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,64,0.015017599860827128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,32,0.014886400103569031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,12288,0.03565013408660889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,8192,0.028883200883865357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,10240,0.0324127991994222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,7168,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,6144,0.025221333901087446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,16384,0.04216959873835246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,5120,0.023427200317382813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,7168,65536,0.1419178644816081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,4096,0.02210879921913147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,2048,0.01853013237317403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,3584,0.02127573291460673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,3072,0.019879466295242308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,2560,0.01938986579577128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,1536,0.017139200369517008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,512,0.015316266814867655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,1024,0.015200000007947287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,768,0.015335466464360556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,128,0.014569600423177084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,256,0.014941866199175516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,64,0.01495146652062734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,32,0.014881066481272378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,6144,0.023060266176859537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,10240,0.028780800104141236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,5120,0.02170133392016093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,8192,0.026422399282455444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,7168,0.02437653342882792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,12288,0.03184319933255513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,16384,0.04107093413670858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,6144,65536,0.12046186923980713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,3584,0.019921066363652547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,4096,0.020473599433898926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,3072,0.01946453253428141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,2560,0.01853546698888143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,2048,0.017692800362904867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,768,0.015427199999491372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,1536,0.01616106629371643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,1024,0.015336533387502035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,32,0.014791466792424521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,512,0.015006933609644571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,256,0.01495039959748586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,128,0.01460693379243215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,64,0.014963199694951376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,7168,0.02529279987017314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,6144,0.024251733223597208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,5120,65536,0.10696746508280437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,12288,0.0313482662041982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,16384,0.03570559819539388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,8192,0.02637653350830078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,10240,0.028830933570861816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,3072,0.018552533785502114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,3584,0.01907306710879008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,5120,0.020438400904337566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,4096,0.019640533129374187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,2560,0.01765226721763611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,1536,0.015336533387502035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,2048,0.01625706652800242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,1024,0.015503999590873719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,768,0.015386666854222616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,512,0.014969600240389505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,256,0.014916266997655234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,128,0.014307199915250143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,64,0.014806399742762247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,32,0.01492693324883779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,4096,65536,0.0884447971979777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,16384,0.03431040048599243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,8192,0.025518933931986492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,7168,0.024514132738113405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,6144,0.023534933725992836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,10240,0.027526400486628216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,12288,0.02956266601880391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,5120,0.02026240030924479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,4096,0.019156267245610557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,768,0.015369600057601929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,3584,0.018849066893259683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,3072,0.01853546698888143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,2560,0.017518933614095053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,2048,0.015785599748293556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,1536,0.016115199526151022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,1024,0.015655466914176942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,512,0.01530239979426066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,65536,0.0858400026957194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,256,0.014728533228238425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,128,0.0145578662554423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,64,0.014739200472831726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3584,32,0.014855466286341348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,10240,0.026469333966573076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,12288,0.028113067150115967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,16384,0.032781867186228435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,8192,0.024889600276947022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,7168,0.02350613276163737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,5120,0.019896533091862997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,6144,0.02292799949645996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,4096,0.01885333259900411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,3584,0.018331732352574667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,3072,0.01798506577809652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,2560,0.016541866461435954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,2048,0.015735466281572977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,768,0.015244799852371215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,1536,0.01593280037244161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,1024,0.015503999590873719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,512,0.015110400319099427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,32,0.014657066265741984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,256,0.014863999684651694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,128,0.01443839967250824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,64,0.014575999975204468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,3072,65536,0.07490560213724771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,10240,0.024689066410064697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,12288,0.026216532786687213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,7168,0.02241706649462382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,16384,0.029663999875386555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,8192,0.023441066344579063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,6144,0.021964800357818604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,5120,0.019406932592391967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,4096,0.01880426605542501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,3584,0.018191999197006224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,3072,0.01727679967880249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,768,0.01539413332939148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,1536,0.01577279965082804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,2048,0.01618666648864746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,2560,0.016665599743525186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,1024,0.01566933294137319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,512,0.0150709331035614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,65536,0.06666560173034668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,128,0.014506666858990987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,256,0.014503467082977294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,64,0.014613333344459533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2560,32,0.014782933394114175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,12288,0.024320000410079957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,8192,0.022562134265899658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,10240,0.02398186723391215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,7168,0.021266132593154907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,16384,0.027083732684453327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,6144,0.021185066302617392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,3584,0.017051732540130614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,5120,0.019297067324320474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,4096,0.017707733313242595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,3072,0.016684800386428833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,2560,0.016169599692026772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,2048,0.01618346671263377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,1024,0.015413332978884378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,65536,0.05832853317260742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,1536,0.015868799885114034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,512,0.015220266580581666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,768,0.015195733308792115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,128,0.014337066809336343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,32,0.014733866850535075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,256,0.014858667055765787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,2048,64,0.014828800161679586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,12288,0.022964266935984294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,16384,0.024810665845870973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,7168,0.021026132504145305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,10240,0.022286933660507203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,8192,0.021041067441304524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,6144,0.019910399119059244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,5120,0.018049067258834837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,3072,0.01688533425331116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,4096,0.017847466468811034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,3584,0.017520000537236534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,2048,0.01586560010910034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,65536,0.05005120038986206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,2560,0.016594133774439492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,768,0.015131733814875283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,1536,0.015923200050989787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,1024,0.015455999970436096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,512,0.01507306694984436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,64,0.014456533392270408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,128,0.014410666624704995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,256,0.01474133332570394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1536,32,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,10240,0.021266132593154907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,16384,0.023412267367045082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,12288,0.021010132630666097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,4096,0.01711146632830302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,8192,0.020041600863138834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,7168,0.019908267259597778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,6144,0.018651733795801796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,5120,0.018057600657145182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,65536,0.04081386725107829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,2560,0.016540799538294473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,3584,0.017332265774408974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,3072,0.01689173380533854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,2048,0.01588053305943807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,1536,0.015656532843907674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,256,0.014620799819628397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,1024,0.015250133474667868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,768,0.015046399831771851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,512,0.014765866597493491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,32,0.01454080045223236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,128,0.014578133821487427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,1024,64,0.014544000228246054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,16384,0.02182719906171163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,8192,0.019042134284973145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,12288,0.02081706722577413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,10240,0.019747199614842732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,7168,0.01955733299255371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,6144,0.01945599913597107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,5120,0.018220800161361694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,4096,0.017477333545684814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,3584,0.017204266786575318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,65536,0.037197868029276535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,3072,0.01684479912122091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,2048,0.01586666703224182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,2560,0.016492799917856852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,512,0.014728533228238425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,768,0.01511679987112681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,1536,0.01574720044930776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,1024,0.015264000495274863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,256,0.014426666498184203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,64,0.014524799585342408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,128,0.014318933089574179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,16384,0.02076479991277059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,768,32,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,12288,0.01989439924558004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,10240,0.019817600647608437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,8192,0.01888426740964254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,5120,0.018202666441599527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,65536,0.032103466987609866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,7168,0.019633066654205323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,6144,0.019244800011316933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,4096,0.01763520042101542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,2048,0.016251732905705772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,3584,0.01729066570599874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,3072,0.016774400075276693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,2560,0.01627840002377828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,1536,0.015759999553362526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,1024,0.01527679959932963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,768,0.014839466412862143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,256,0.014614400267601014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,512,0.014800000190734863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,128,0.014533332983652749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,64,0.01453439990679423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,512,32,0.014327466487884521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,10240,0.019980800151824952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,16384,0.020038400093714395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,65536,0.02837013403574626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,12288,0.019143466154734293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,8192,0.019128533204396565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,4096,0.017663999398549398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,7168,0.019578667481740315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,6144,0.019324799378712974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,5120,0.01788160006205241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,3072,0.016642133394877114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,3584,0.01732053359349569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,2560,0.01635199983914693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,2048,0.015999999642372132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,1536,0.01551466683546702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,1024,0.015381333231925965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,512,0.014597333470980325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,256,0.014641066392262777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,768,0.015067733327547708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,128,0.014427733421325684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,64,0.014377599954605103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,256,32,0.014570666352907815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,10240,0.01953386664390564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,65536,0.025997867186864216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,16384,0.020092799266179403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,12288,0.019180800517400107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,8192,0.018953599532445273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,7168,0.019367466370264687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,4096,0.017544533809026083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,6144,0.018838399648666383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,3584,0.01728746692339579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,5120,0.018227199713389076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,3072,0.016778665781021117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,2560,0.01601066688696543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,1024,0.015191466609636942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,2048,0.015874133507410685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,1536,0.015570132931073507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,512,0.015058133006095886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,768,0.015030399958292643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,256,0.01467626690864563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,128,0.014455466469128927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,32,0.01441493332386017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,128,64,0.014225066701571146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,12288,0.019357866048812865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,16384,0.019719467560450236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,65536,0.0247871994972229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,10240,0.01938026746114095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,6144,0.018902399142583213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,8192,0.018862932920455933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,7168,0.01935466726620992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,5120,0.017911465962727864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,4096,0.017432532707850137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,3072,0.016661333044370015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,2048,0.015874133507410685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,3584,0.017095466454823814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,2560,0.01623253325621287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,1536,0.015662933389345803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,768,0.014962133765220643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,1024,0.01524906655152639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,128,0.014289066195487976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,512,0.015059199929237366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,256,0.014514133334159851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,64,0.014361600081125895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,64,32,0.01436906655629476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,10240,0.019130667050679527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,8192,0.0188426673412323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,16384,0.019435733556747437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,12288,0.018992000818252565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,65536,0.024972800413767496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,7168,0.018987733125686645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,6144,0.01871573328971863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,4096,0.01760853330294291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,3584,0.01704960068066915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,5120,0.018090667327245076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,3072,0.01676693360010783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,2560,0.016360533237457276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,1536,0.015552000204722086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,2048,0.015940266847610473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,768,0.01487573285897573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,512,0.014995200435320535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,1024,0.015266133348147073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,256,0.014524799585342408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,128,0.014158933361371358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,64,0.014491732915242514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,32,32,32,0.014358400305112203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,4096,0.08074986934661865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,5120,0.09734079837799073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,6144,0.11423359711964924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,7168,0.12921280066172283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,8192,0.14734080632527669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,10240,0.18081386884053546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,3584,0.07225279808044434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,3072,0.0652234673500061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,2560,0.05640213489532471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,2048,0.04917546510696411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,12288,0.21119146347045897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,1536,0.040532267093658446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,1024,0.03466240167617798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,512,0.023626667261123658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,256,0.019293866554896035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,768,0.02814720074335734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,64,0.015732266505559287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,128,0.016269866625467935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,32,0.016569599509239197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,8192,0.0469322681427002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,6144,0.03922986586888631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,7168,0.04301120042800903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,10240,0.05444480180740356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,65536,16384,0.2771231969197591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,12288,0.06191786527633667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,5120,0.035344000657399496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,16384,0.0773248036702474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,4096,0.03126399914423625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,3584,0.02949013312657674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,1536,0.02100693384806315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,3072,0.027548799912134807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,2560,0.026368000109990436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,2048,0.022693334023157756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,256,0.015315199891726175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,1024,0.019053866465886436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,768,0.018090667327245076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,512,0.01550933321317037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,128,0.015118933717409768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,64,0.015200000007947287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,32,0.014889599879582724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,6144,0.034434131781260174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,7168,0.037882665793101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,8192,0.040611199537913006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,10240,0.045741868019104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,12288,0.053907199700673425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,16384,0.06652160088221232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,2560,0.023129600286483764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,5120,0.03094826738039653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,3584,0.02632746696472168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,3072,0.02431360085805257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,4096,0.028219733635584516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,2048,0.021333332856496176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,1536,0.01960960030555725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,1024,0.018103466431299845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,768,0.016743467251459757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,512,0.015290666619936624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,256,0.01513920029004415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,128,0.014944000045458474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,64,0.014863999684651694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,32,0.01474133332570394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,16384,65536,0.27801812489827477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,10240,0.041266131401062014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,12288,0.04599146842956543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,7168,0.033402665456136064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,8192,0.03588159879048665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,16384,0.06103253364562988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,6144,0.033090132474899295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,3072,0.022658133506774904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,2560,0.021362133820851645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,5120,0.028152533372243244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,4096,0.02548266649246216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,3584,0.0236629327138265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,2048,0.020439465840657554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,12288,65536,0.21513813336690268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,1536,0.018972800175348917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,1024,0.017399466037750243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,768,0.015316266814867655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,128,0.01483626663684845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,512,0.015205333630243937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,256,0.014803199966748556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,64,0.014811733365058899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,32,0.014882133404413859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,7168,0.02944213350613912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,8192,0.031386667490005495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,12288,0.0394538680712382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,10240,0.035427200794219973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,6144,0.02765013376871745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,16384,0.04763840039571126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,3584,0.022282665967941283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,5120,0.02531840006510417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,3072,0.021217066049575805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,2560,0.020076799392700195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,4096,0.022731733322143555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,1536,0.018001067638397216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,2048,0.01919680039087931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,1024,0.01583146651585897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,768,0.015493333339691162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,512,0.015359999736150107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,128,0.014708266655604044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,10240,65536,0.18549013137817383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,256,0.014862933754920959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,64,0.01498240033785502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,32,0.014858667055765787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,6144,0.026395734151204425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,8192,0.03256640036900838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,7168,0.028140799204508467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,12288,0.04036266803741455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,10240,0.038091735045115156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,16384,0.0477567990620931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,3072,0.020758400360743202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,5120,0.02416426738103231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,4096,0.022013866901397706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,3584,0.02160960038503011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,2560,0.019377066691716512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,8192,65536,0.14864640235900878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,1536,0.017755732933680216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,2048,0.01882879932721456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,1024,0.015333333611488342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,512,0.01530239979426066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,768,0.015331199765205384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,256,0.014869333306948344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,64,0.014863999684651694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,128,0.014727466305096946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,32,0.014838400483131408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,8192,0.028193066517512005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,7168,0.02585066755612691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,6144,0.024966400861740113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,10240,0.031175466378529866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,12288,0.03396373192469279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,5120,0.023078399896621703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,16384,0.04076799949010213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,4096,0.021040000518163047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,2048,0.01831573247909546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,7168,65536,0.13596906661987304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,3584,0.020948266983032225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,2560,0.018850133816401164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,3072,0.020105600357055664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,768,0.01546346644560496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,1536,0.016263467073440552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,1024,0.015081600348154704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,512,0.014954666296641031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,32,0.014722133676211039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,256,0.014726400375366211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,128,0.014532267053922018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,64,0.014739200472831726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,6144,0.022859734296798707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,10240,0.028843732674916585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,8192,0.025987199942270917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,7168,0.024242132902145386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,6144,65536,0.11623146533966064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,16384,0.03922666708628337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,12288,0.03086293339729309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,4096,0.020849066972732543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,5120,0.021605332692464195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,2560,0.018373332420984902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,3584,0.019785600900650024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,3072,0.01925546725591024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,2048,0.017650133371353148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,1024,0.015556266903877259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,1536,0.01560426652431488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,768,0.015161599715550741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,32,0.014645333091417948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,512,0.015082666277885437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,256,0.014737066626548768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,128,0.014535466829935709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,64,0.014630400141080222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,7168,0.02482453385988871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,6144,0.024026666084925333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,5120,65536,0.10070933500925701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,12288,0.0302346666653951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,10240,0.028123732407887774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,8192,0.025873066981633504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,16384,0.03447573184967041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,3072,0.018347734212875368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,5120,0.02029119928677877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,2560,0.01765120029449463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,4096,0.01950826644897461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,3584,0.019008000691731773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,2048,0.016498133540153503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,1536,0.015601066748301187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,1024,0.015539200107256571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,512,0.015040000279744467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,768,0.015100799997647605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,256,0.014694399634997048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,128,0.014824533462524414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,64,0.014735999703407287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,32,0.01474133332570394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,4096,65536,0.08319466908772787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,16384,0.032849067449569704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,8192,0.025118933121363325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,7168,0.0243231991926829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,12288,0.028501333793004353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,10240,0.026873600482940675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,6144,0.023205333948135377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,5120,0.02009493311246236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,4096,0.019160532951354982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,768,0.015054933230082192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,3584,0.018651733795801796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,3072,0.018222934007644652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,2560,0.016810667514801026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,2048,0.016039466857910155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,1536,0.015770666797955833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,1024,0.015355733036994935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,512,0.01498240033785502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,65536,0.08052266438802083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,256,0.014774399995803832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,128,0.014633599917093912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,64,0.014541866381963095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3584,32,0.014710399508476257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,10240,0.025252266724904375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,12288,0.02738986611366272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,16384,0.03107733329137166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,7168,0.023510400454203287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,8192,0.024426666895548503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,6144,0.022410666942596434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,5120,0.019581866264343262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,4096,0.018969599405924478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,3584,0.018490666151046754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,3072,0.01780479947725932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,2560,0.01665066679318746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,2048,0.01651413341363271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,1536,0.015692800283432007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,1024,0.01504533290863037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,768,0.015172266960144043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,512,0.015065600474675497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,65536,0.06942400137583414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,256,0.014745600024859109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,128,0.014629333217938741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,64,0.014773333072662353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,3072,32,0.014332800110181173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,12288,0.02560746669769287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,16384,0.028806400299072266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,4096,0.018320000171661376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,8192,0.02313813368479411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,10240,0.02412373423576355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,7168,0.02232853372891744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,6144,0.021828265984853108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,5120,0.019374932845433554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,3072,0.016910932461420693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,3584,0.017474132776260375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,2560,0.01649066706498464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,2048,0.01607146660486857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,1024,0.015369600057601929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,1536,0.015686399737993875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,768,0.015094400445620219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,512,0.014944000045458474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,65536,0.06239466667175293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,256,0.014595199624697366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,128,0.014727466305096946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,64,0.014424533645311991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2560,32,0.014723199605941772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,12288,0.023804799715677897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,8192,0.021800533930460612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,10240,0.022999467452367146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,16384,0.025869866212209065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,4096,0.018076799313227334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,3584,0.01699733336766561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,7168,0.021498666206995646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,6144,0.020725333690643312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,5120,0.01925546725591024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,3072,0.016277333100636802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,2560,0.01616426706314087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,1536,0.015642666816711427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,2048,0.0160778671503067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,65536,0.052892800172170004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,1024,0.015217066804567973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,768,0.015056000153223673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,512,0.015020799636840821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,256,0.014604799946149192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,64,0.014734933773676554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,32,0.01453013320763906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,2048,128,0.014482133587201438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,12288,0.02251946727434794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,16384,0.02410773237546285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,7168,0.02081706722577413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,10240,0.02182506720225016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,8192,0.021121066808700562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,6144,0.019373865922292073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,5120,0.017463467518488564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,4096,0.0172437330087026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,1536,0.015520000457763672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,3072,0.016577066977818807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,3584,0.017172267039616905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,65536,0.04654186566670736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,2048,0.016085333625475564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,2560,0.016221867005030314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,1024,0.015102932850519816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,512,0.014897066354751586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,256,0.014519466956456503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,768,0.014932266871134438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,128,0.014697600404421488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,64,0.014298666516939798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1536,32,0.014594133694966635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,12288,0.020911999543507895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,10240,0.020602667331695558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,16384,0.022435200214385987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,8192,0.020170666774113975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,7168,0.019054933389027914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,6144,0.01922986706097921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,5120,0.017998933792114258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,65536,0.03748053312301636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,3584,0.017233065764109292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,4096,0.017309866348902383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,3072,0.016722132762273155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,512,0.01482133368651072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,2560,0.016217600305875143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,2048,0.0167797327041626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,1536,0.01548373301823934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,1024,0.015065600474675497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,768,0.015211733182271323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,256,0.014579199751218162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,64,0.014707199732462563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,128,0.014416000247001648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,1024,32,0.01453013320763906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,7168,0.019450666507085164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,12288,0.020207999149958293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,16384,0.021540266275405884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,8192,0.019262933731079103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,10240,0.01919040083885193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,6144,0.018885332345962524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,4096,0.017545600732167564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,65536,0.03396799961725871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,5120,0.018088533480962118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,2560,0.01609813372294108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,3584,0.01700693368911743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,3072,0.0166293332974116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,2048,0.01602133313814799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,1024,0.015381333231925965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,1536,0.015546666582425437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,256,0.014510933558146158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,768,0.015019733707110086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,512,0.014838400483131408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,128,0.014459733168284097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,64,0.01455893317858378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,768,32,0.014633599917093912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,12288,0.019157334168752035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,16384,0.01967573364575704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,65536,0.02898453275362651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,10240,0.01919680039087931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,8192,0.018942934274673463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,7168,0.019205333789189656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,6144,0.019057067235310872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,4096,0.01732586622238159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,5120,0.01826559901237488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,3584,0.016986666123072307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,768,0.014940800269444785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,3072,0.016863999764124553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,2560,0.016254933675130208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,2048,0.015989333391189575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,1536,0.015339733163515726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,1024,0.015100799997647605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,512,0.014961066842079162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,128,0.014564266800880432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,256,0.01448319951693217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,64,0.014670933286348978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,512,32,0.014595199624697366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,16384,0.019106133778889974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,12288,0.019078399737675986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,10240,0.019335466623306274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,65536,0.025098667542139692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,8192,0.01883093317349752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,6144,0.01890666683514913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,7168,0.019374932845433554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,5120,0.018119466304779053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,4096,0.01722666621208191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,2048,0.01596799989541372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,1536,0.015553067127863566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,3584,0.01699413259824117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,3072,0.016794667641321818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,2560,0.01614720026652018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,1024,0.01497706671555837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,768,0.014941866199175516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,128,0.014645333091417948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,256,0.014532267053922018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,512,0.014765866597493491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,32,0.014621866742769876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,256,64,0.01439573367436727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,65536,0.022834134101867676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,16384,0.018962132930755615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,10240,0.019115734100341796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,8192,0.018552533785502114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,12288,0.01885226567586263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,7168,0.01912533243497213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,6144,0.018784000476201376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,5120,0.01786880095799764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,3072,0.016504533092180886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,4096,0.0171509325504303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,3584,0.016641066471735636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,2560,0.016099199652671814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,1536,0.01539306640625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,2048,0.015945600469907124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,768,0.01477226714293162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,1024,0.015155200163523355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,512,0.01477120021979014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,64,0.014411733547846476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,256,0.014545067151387533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,128,0.014203733205795288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,128,32,0.014378666877746582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,10240,0.01903146704037984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,8192,0.018399999539057414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,65536,0.02145706613858541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,16384,0.018959999084472656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,12288,0.018711467583974205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,7168,0.018888533115386963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,6144,0.018541866540908815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,3584,0.01671573321024577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,5120,0.01765120029449463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,4096,0.017194666465123496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,2560,0.016005333264668783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,3072,0.016509866714477538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,2048,0.015822933117548624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,1536,0.015307733416557312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,768,0.01474666694800059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,1024,0.014818132917086283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,256,0.01444906691710154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,512,0.014634666840235391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,128,0.0143477330605189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,64,0.014429866274197897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,64,32,0.014498133460680643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,16384,0.01890346606572469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,10240,0.018908800681432088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,65536,0.02185279925664266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,12288,0.018206934134165444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,7168,0.018964266777038573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,8192,0.018388267358144125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,4096,0.016883200407028197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,6144,0.018683733542760213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,3584,0.01684266726175944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,5120,0.017806933323542277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,3072,0.016458666324615477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,2048,0.01576746702194214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,2560,0.015774933497111003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,512,0.014681599537531533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,1536,0.015309866269429526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,1024,0.014898133277893067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,768,0.014871467153231302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,256,0.014525866508483887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,128,0.014266666769981385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,64,0.014354133605957031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,16,32,32,0.01397546629110972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,4096,0.08057066599527994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,5120,0.09685440063476562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,6144,0.11367039680480957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,7168,0.12861653168996173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,8192,0.14577919642130535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,10240,0.1786069393157959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,3584,0.07188159624735514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,3072,0.0642741322517395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,2560,0.055846401055653895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,2048,0.0484991987546285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,1024,0.03413120110829671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,1536,0.04004480044047038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,768,0.02789120078086853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,64,0.0162090669075648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,512,0.023176532983779908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,256,0.01935360034306844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,12288,0.2099455992380778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,128,0.01604586640993754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,32,0.016453333695729575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,6144,0.038771200180053714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,65536,16384,0.2787242571512858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,7168,0.042632532119750974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,8192,0.050570666790008545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,10240,0.05403733253479004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,12288,0.061806933085123694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,16384,0.07707839806874593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,4096,0.03320320049921672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,3584,0.029183999697367353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,3072,0.027155200640360515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,1536,0.020807466904322305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,5120,0.03504000107447307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,2560,0.02493013342221578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,2048,0.02290133237838745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,1024,0.01882879932721456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,256,0.015158399939537048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,768,0.017897599935531618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,512,0.015372799833615622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,128,0.014910933375358582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,64,0.014990933736165366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,32,0.01497066617012024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,6144,0.034279465675354004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,8192,0.04001599947611491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,7168,0.03744000196456909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,10240,0.045456000169118244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,12288,0.05353920062383016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,16384,0.06588053305943807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,2048,0.021271467208862305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,2560,0.022940800587336222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,4096,0.027934932708740236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,5120,0.031229867537816362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,3584,0.026268800099690754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,3072,0.024264534314473472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,1536,0.01946880022684733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,1024,0.01813760002454122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,64,0.014762666821479798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,768,0.017052799463272095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,512,0.01542080044746399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,256,0.015004799763361613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,128,0.014787200093269347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,32,0.014991999665896098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,8192,0.03572800159454346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,10240,0.04098879893620809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,7168,0.033360000451405844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,12288,0.04952106475830078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,16384,0.059680000940958655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,16384,65536,0.28745174407958984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,6144,0.03065813382466634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,4096,0.025115732351938886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,3584,0.023731199900309245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,3072,0.02264639933904012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,5120,0.028076799710591634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,2560,0.021988266706466676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,12288,65536,0.22475199699401854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,2048,0.020041600863138834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,1536,0.018902399142583213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,1024,0.017505067586898803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,768,0.01529813309510549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,512,0.015214932958285013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,128,0.014874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,256,0.01469546655813853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,64,0.014735999703407287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,32,0.014781866470972696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,10240,0.035240534941355386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,8192,0.03113493323326111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,12288,0.0385696013768514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,7168,0.029292800029118854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,6144,0.02727893392244975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,16384,0.04661973317464192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,5120,0.025257599353790284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,10240,65536,0.18952320416768392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,3584,0.02188053329785665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,3072,0.02097813288370768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,2560,0.02008533279101054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,4096,0.022632533311843873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,1536,0.018052266041437785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,2048,0.019082667430241902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,1024,0.016004266341527303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,64,0.014779733618100485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,768,0.015331199765205384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,256,0.015051733454068503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,512,0.015044266978899637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,128,0.014478933811187745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,32,0.014751999576886495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,7168,0.029922133684158324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,8192,0.03204266627629598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,6144,0.02845653295516968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,10240,0.03585066795349121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,16384,0.0477514664332072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,12288,0.03949973185857137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,2560,0.01971093416213989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,4096,0.021950932343800862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,3584,0.021341866254806517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,3072,0.020567466815312706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,8192,65536,0.15402560234069823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,2048,0.018501333395640054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,5120,0.023590399821599325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,1536,0.01767573356628418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,32,0.01467626690864563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,1024,0.015556266903877259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,768,0.01534933348496755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,512,0.015273599823315939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,256,0.014854400356610616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,128,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,64,0.01447466711203257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,6144,0.024261333545049033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,7168,0.025990400711695356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,8192,0.02805440028508504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,10240,0.030843732754389445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,12288,0.03410880168279012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,7168,65536,0.1344821294148763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,16384,0.03905813296635945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,5120,0.02260800004005432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,2048,0.018860799074172974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,4096,0.021418666839599608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,3584,0.0206986665725708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,3072,0.019964800278345744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,2560,0.019218132893244425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,1536,0.01717546582221985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,768,0.015235199530919393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,1024,0.01502293348312378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,512,0.015145599842071533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,256,0.014788267016410828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,128,0.014780799547831217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,64,0.014661332964897156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,32,0.01469013293584188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,6144,0.02258560061454773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,7168,0.023990400632222495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,8192,0.0254314661026001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,6144,65536,0.12147626876831055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,10240,0.028061866760253906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,16384,0.03841813405354817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,12288,0.033174399534861246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,5120,0.02143893241882324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,3072,0.019241599241892497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,4096,0.02023893396059672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,3584,0.019742933909098308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,2048,0.017486933867136636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,2560,0.0184608002503713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,1536,0.01541759967803955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,768,0.015105066696802774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,1024,0.015275733669598899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,512,0.015030399958292643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,256,0.014868266383806863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,128,0.01452906628449758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,64,0.014596266547838846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,32,0.014617600043614707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,5120,65536,0.10499413013458252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,7168,0.024655999739964803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,6144,0.023516800006230673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,5120,0.02034026583035787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,16384,0.0339573343594869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,8192,0.02553173303604126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,10240,0.027811199426651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,12288,0.029946666955947877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,3072,0.018273067474365235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,4096,0.019250132640202842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,3584,0.0187008003393809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,2560,0.017658666769663493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,2048,0.01641386648019155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,1536,0.015592533349990844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,1024,0.015441067020098367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,768,0.015032533804575601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,512,0.014886400103569031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,256,0.014756266276041666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,65536,0.08672853310902914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,128,0.014686933159828186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,64,0.014342400431632995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,4096,32,0.014754133423169455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,8192,0.025276799996693928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,16384,0.032253867387771605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,7168,0.023785599072774253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,6144,0.023004800081253052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,5120,0.019939200083414713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,4096,0.01877760092417399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,10240,0.026264532407124834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,12288,0.028334933519363403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,3584,0.018477867046991982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,1536,0.01573973298072815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,3072,0.01775253415107727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,2560,0.016152532895406087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,2048,0.01591253379980723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,1024,0.015278933445612588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,768,0.015058133006095886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,512,0.014847999811172486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,256,0.014626133441925048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,128,0.014545067151387533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,64,0.014255999525388082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,32,0.014725333452224732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3584,65536,0.0772490660349528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,10240,0.025512532393137617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,12288,0.02707200050354004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,4096,0.018618667125701906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,16384,0.030430932839711506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,8192,0.023486934105555215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,6144,0.022386133670806885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,7168,0.022894932826360067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,5120,0.019612799088160195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,3072,0.017486933867136636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,3584,0.018359466393788656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,2048,0.015532799561818442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,2560,0.016295466820398966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,1536,0.015481600165367126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,1024,0.015072000026702882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,768,0.015109333395957946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,512,0.014830933014551798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,256,0.01474240024884542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,65536,0.06715733210245768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,128,0.014643200238545737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,64,0.014595199624697366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,3072,32,0.014756266276041666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,12288,0.025060266256332397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,10240,0.024155733982721965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,16384,0.028088533878326417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,7168,0.022009599208831786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,6144,0.02138239940007528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,8192,0.02258239984512329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,5120,0.01920426686604818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,4096,0.01839253306388855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,3584,0.017718400557835898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,3072,0.01651946703592936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,2560,0.016229333480199178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,512,0.014850133657455444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,2048,0.01574399967988332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,768,0.015060266852378846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,65536,0.06163413524627685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,1024,0.015409066279729208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,1536,0.015666133165359496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,256,0.014812800288200378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,64,0.014548266927401224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,128,0.014645333091417948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2560,32,0.014605866869290671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,12288,0.023520000775655112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,7168,0.021127466360727945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,10240,0.022997333606084188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,16384,0.02558186650276184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,8192,0.02181333303451538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,6144,0.020602667331695558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,5120,0.018967467546463012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,3584,0.016670932372411094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,4096,0.017173333962758382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,3072,0.016593066851298015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,2560,0.01618346671263377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,65536,0.0511680006980896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,2048,0.015897599856058757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,768,0.01497066617012024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,1536,0.015459199746449789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,1024,0.015212800105412802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,512,0.014897066354751586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,64,0.014481066664059957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,256,0.014476799964904785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,128,0.014616533120473226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,2048,32,0.014645333091417948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,16384,0.023363200823465984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,12288,0.022338134050369263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,8192,0.020844799280166627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,10240,0.021652267376581828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,7168,0.020631466309229532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,6144,0.0196288009484609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,5120,0.017683200041453042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,65536,0.04385173320770264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,3072,0.016339199741681416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,4096,0.01760853330294291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,3584,0.01699626644452413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,2560,0.016309332847595216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,2048,0.01581546664237976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,512,0.014748799800872802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,1536,0.01556373337904612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,1024,0.015288533767064414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,768,0.014942933122316995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,256,0.014574933052062988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,128,0.014599466323852539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,64,0.014710399508476257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1536,32,0.01455573340257009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,10240,0.020566399892171225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,16384,0.022089600563049316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,12288,0.02066453297932943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,8192,0.01853013237317403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,7168,0.019381332397460937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,65536,0.035454932848612467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,4096,0.01734506686528524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,6144,0.019003732999165853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,5120,0.017886932690938315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,3584,0.016643200318018594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,3072,0.016536532839139303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,2560,0.016006400187810264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,1024,0.015253333250681558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,2048,0.015778133273124696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,1536,0.015629866719245912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,768,0.015018666783968607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,512,0.014747732877731323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,32,0.014625066518783569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,256,0.0144896000623703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,128,0.014712533354759217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,1024,64,0.014533332983652749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,16384,0.02095466653505961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,12288,0.01898026665051778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,10240,0.018965333700180054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,8192,0.01871466636657715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,65536,0.03185813426971436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,7168,0.018757333358128868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,6144,0.018691200017929076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,5120,0.019427200158437095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,4096,0.018550399939219156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,2560,0.01694719990094503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,3584,0.018237866957982383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,3072,0.01790293256441752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,2048,0.016390400131543477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,1536,0.015988266468048094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,512,0.014904533823331198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,1024,0.015655466914176942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,768,0.0151637335618337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,256,0.01483519971370697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,128,0.014732799927393594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,64,0.01451520025730133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,768,32,0.014549332857131957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,16384,0.01925546725591024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,8192,0.018500266472498576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,12288,0.018266665935516357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,10240,0.018897066513697304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,65536,0.026804266373316447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,7168,0.019282132387161255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,6144,0.018772266308466592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,4096,0.017349332571029663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,3584,0.017003732919692992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,5120,0.01793280045191447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,3072,0.01662613352139791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,2560,0.016198399662971496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,2048,0.01574399967988332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,1536,0.015570132931073507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,512,0.014856533209482829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,768,0.01508799990018209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,1024,0.015320533514022827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,256,0.014668800433476768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,64,0.014597333470980325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,128,0.014712533354759217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,16384,0.018971733252207437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,512,32,0.014316800236701965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,12288,0.018872533241907755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,65536,0.02391786575317383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,10240,0.01917333404223124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,8192,0.01845226685206095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,7168,0.01890346606572469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,5120,0.017838933070500693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,3584,0.017116800944010416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,6144,0.01884053349494934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,4096,0.017119999726613364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,3072,0.01660693287849426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,2560,0.01623466710249583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,2048,0.015738667050997416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,1024,0.015176533659299215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,1536,0.015427199999491372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,768,0.014919466773668923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,512,0.014857600132624308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,256,0.014549332857131957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,128,0.014740266402562461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,32,0.014497066537539164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,256,64,0.014490666985511779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,65536,0.021564799547195434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,12288,0.018499199549357095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,16384,0.018686934312184652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,10240,0.018809600671132406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,8192,0.018288000424702962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,7168,0.0188426673412323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,3584,0.017051732540130614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,4096,0.017262933651606242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,6144,0.01842026710510254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,5120,0.017838933070500693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,3072,0.016535466909408568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,1024,0.015178666512171427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,2560,0.016153599818547568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,2048,0.015718400478363037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,1536,0.01553600033124288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,768,0.01492586632569631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,512,0.014739200472831726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,256,0.014425599575042724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,128,0.014233600099881491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,64,0.014412800470987955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,128,32,0.01451520025730133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,12288,0.018362667163213095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,65536,0.019966934124628702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,16384,0.01872640053431193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,10240,0.018501333395640054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,6144,0.018616533279418944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,7168,0.018565332889556883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,8192,0.0180074671904246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,4096,0.016965333620707193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,5120,0.017543466885884602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,3584,0.01638826628526052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,2048,0.01580586632092794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,3072,0.016340266664822897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,2560,0.015897599856058757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,1536,0.015093333522478738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,1024,0.014991999665896098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,768,0.014712533354759217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,128,0.014526933431625366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,512,0.014667733510335287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,256,0.014248533050219217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,64,0.014497066537539164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,64,32,0.014380799730618796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,10240,0.018862932920455933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,12288,0.018332799275716148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,65536,0.020598399639129638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,16384,0.018645334243774413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,8192,0.018055466810862224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,7168,0.01830079952875773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,4096,0.017292799552281697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,6144,0.018410666783650716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,5120,0.017644800742467246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,3584,0.016696532567342125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,3072,0.0164192001024882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,2560,0.015892266233762106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,2048,0.015526400009791056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,1024,0.014892799655596414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,1536,0.015093333522478738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,256,0.01430293321609497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,768,0.014843733112017313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,512,0.014532267053922018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,128,0.014247467120488485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,64,0.01423466702302297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,8,32,32,0.014341333508491516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,4096,0.08035946687062581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,5120,0.09632319609324137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,6144,0.11375466982523601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,7168,0.1280117352803548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,8192,0.14651519457499187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,10240,0.17814292907714843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,3584,0.07157973448435465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,3072,0.06394559939702352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,2560,0.05585813522338867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,2048,0.04876373211542766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,1024,0.03372480074564616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,1536,0.03978133201599121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,768,0.027799467245737713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,64,0.016195199886957803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,256,0.019383466243743895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,128,0.015753600001335143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,512,0.023205333948135377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,12288,0.20897812843322755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,32,0.01641386648019155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,6144,0.03880853255589803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,8192,0.04662079811096191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,7168,0.04593066771825154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,65536,16384,0.27776107788085935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,10240,0.053761065006256104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,12288,0.061427199840545656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,16384,0.07681386470794678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,1536,0.020834134022394816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,3072,0.028570665915807085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,4096,0.030923734108606975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,3584,0.028981333971023558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,5120,0.03482026656468709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,2560,0.024948267141977946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,2048,0.02272640069325765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,1024,0.018759467204411826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,768,0.017997866868972777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,512,0.015226667126019796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,256,0.015068800250689188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,128,0.015050666530927024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,64,0.014904533823331198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,32,0.014813866217931113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,6144,0.033240532875061034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,7168,0.03751360177993775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,8192,0.04047893285751343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,10240,0.04726933240890503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,12288,0.053766401608785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,16384,0.06607573429743449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,2048,0.021287467082341513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,2560,0.02265066703160604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,4096,0.027907200654347736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,5120,0.03025706609090169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,3584,0.026207999388376875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,3072,0.024522666136423746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,1536,0.019488000869750978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,64,0.014789332946141561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,1024,0.017812265952428182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,512,0.014853333433469137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,768,0.01663146714369456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,256,0.015217066804567973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,128,0.01483626663684845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,32,0.014817066987355552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,7168,0.03424213329950969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,8192,0.035507198174794516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,10240,0.040752001603444415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,12288,0.045604264736175536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,16384,0.05904746850331625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,16384,65536,0.27197653452555337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,6144,0.031396265824635824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,4096,0.024934399127960204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,5120,0.02776319980621338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,3072,0.02249493400255839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,3584,0.023752532402674355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,2048,0.020130133628845213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,2560,0.022040534019470214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,256,0.015131733814875283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,12288,65536,0.2204970677693685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,768,0.015291733543078103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,1536,0.01893226703008016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,1024,0.017220266660054526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,512,0.015161599715550741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,128,0.014888532956441245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,64,0.014702933033307395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,32,0.014665599664052328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,8192,0.03145066698392232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,6144,0.027074132363001508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,7168,0.029277867078781127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,10240,0.0348960002263387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,12288,0.03834986686706543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,3584,0.022024534145991006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,16384,0.04661120176315307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,4096,0.02295359969139099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,3072,0.02099306583404541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,2560,0.019718400637308755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,5120,0.02525866627693176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,2048,0.01897066632906596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,1024,0.015862400333086647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,1536,0.018157867590586345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,768,0.015507200360298156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,10240,65536,0.17868800163269044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,512,0.015210666259129844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,256,0.015066666404406228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,128,0.01477120021979014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,64,0.014722133676211039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,32,0.014814933141072592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,12288,0.0387658675511678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,16384,0.04835413297017415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,8192,65536,0.1488906701405843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,3584,0.022336000204086305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,7168,0.030128000179926555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,4096,0.022090667486190797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,8192,0.03234560092290242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,5120,0.023692800601323446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,10240,0.03552746772766113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,6144,0.02847040096918742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,2560,0.0195743997891744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,1536,0.017576533555984496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,3072,0.02039360006650289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,2048,0.018540799617767334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,1024,0.015109333395957946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,768,0.015421866377194723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,512,0.015318399667739869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,256,0.01490239997704824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,65536,0.1328095992406209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,128,0.014738133549690247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,64,0.014679466684659323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,7168,32,0.014626133441925048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,7168,0.025949867566426595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,10240,0.031624533732732135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,5120,0.023061333100001018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,8192,0.029303467273712157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,6144,0.02378666599591573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,12288,0.03495253324508667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,16384,0.04085973501205444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,4096,0.021572266022364298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,3584,0.020803199211756388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,3072,0.019860267639160156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,2560,0.019078399737675986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,2048,0.018027732769648232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,1536,0.016636799772580466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,1024,0.015301332871119181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,768,0.015166933337847391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,512,0.015101866920789084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,256,0.014945066968599954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,128,0.014629333217938741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,64,0.014601600170135499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,32,0.01474133332570394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,12288,0.03309333324432373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,16384,0.03811093171437581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,6144,0.02268480062484741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,5120,0.021473066012064616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,10240,0.02791786591211955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,7168,0.02404906749725342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,8192,0.025872000058492023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,6144,65536,0.11614720026652019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,4096,0.02036693294843038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,3072,0.018979199727376304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,3584,0.019504000743230186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,2560,0.01819093426068624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,2048,0.017454934120178223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,1536,0.01591253379980723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,1024,0.015243732929229736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,512,0.015046399831771851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,768,0.015361066659291586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,256,0.014905599753061929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,128,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,64,0.014542933305104574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,32,0.014659200112024942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,5120,65536,0.10380053520202637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,12288,0.0295360008875529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,16384,0.03347200155258179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,7168,0.02446399927139282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,6144,0.02370880047480265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,10240,0.027662932872772217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,8192,0.025355732440948485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,4096,0.019483733177185058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,3584,0.018710400660832724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,5120,0.020150399208068846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,3072,0.018158932526906334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,2560,0.017607466379801432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,2048,0.016793600718180337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,768,0.015061333775520325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,1024,0.014997333288192749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,1536,0.014966400464375815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,512,0.015033599734306336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,32,0.014621866742769876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,256,0.014740266402562461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,128,0.014622933665911355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,64,0.01476693352063497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,4096,65536,0.08367466926574707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,10240,0.0261354664961497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,12288,0.02810879945755005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,8192,0.02461013396581014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,16384,0.03170880079269409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,7168,0.023814400037129722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,6144,0.022989867130915324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,5120,0.019849600394566853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,4096,0.018786134322484334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,768,0.015068800250689188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,3584,0.018388267358144125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,3072,0.017937066157658894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,2560,0.017463467518488564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,1024,0.014990933736165366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,1536,0.015588266650835672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,2048,0.01597760021686554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,512,0.015003732840220132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,65536,0.07701760133107503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,256,0.014640000462532044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,32,0.014418133099873862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,128,0.01460693379243215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3584,64,0.014567466576894126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,10240,0.02568320035934448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,12288,0.0272490660349528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,16384,0.030475733677546184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,7168,0.022823466857274374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,8192,0.024165334304173787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,6144,0.021926399072011313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,5120,0.019324799378712974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,4096,0.018717867136001588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,3584,0.018285866578420004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,2560,0.0160970667997996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,3072,0.017220266660054526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,2048,0.01616426706314087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,1536,0.015447466572125753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,512,0.01493119994799296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,768,0.015267200271288552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,1024,0.01523413360118866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,65536,0.06601599852244058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,256,0.014629333217938741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,128,0.014604799946149192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,64,0.014716800053914389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,3072,32,0.014612266421318054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,16384,0.02784213423728943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,12288,0.025254400571187337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,8192,0.02254400054613749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,10240,0.024088533719380696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,7168,0.02191466689109802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,6144,0.021271467208862305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,5120,0.019078399737675986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,4096,0.018270933628082277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,3584,0.017837866147359212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,3072,0.016249600052833556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,768,0.015267200271288552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,2560,0.016285866498947144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,2048,0.015958399573961893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,1536,0.01564586659272512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,1024,0.015127467115720114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,65536,0.06064853270848593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,128,0.01453013320763906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,256,0.014615466197331747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,512,0.01504853367805481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,64,0.014681599537531533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2560,32,0.014568533500035605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,12288,0.023257599274317423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,10240,0.02292799949645996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,8192,0.02160960038503011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,7168,0.021061333020528157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,16384,0.02516266703605652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,3584,0.017147733767827352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,6144,0.020514132579167683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,4096,0.017553067207336424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,5120,0.01884053349494934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,3072,0.016311466693878174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,1536,0.015399466951688132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,2560,0.016323199868202208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,2048,0.015726932883262636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,65536,0.052118400732676186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,768,0.014852266510327658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,1024,0.015219199657440185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,512,0.014908799529075622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,256,0.01471573313077291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,128,0.014472533265749613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,16384,0.023753599325815836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,64,0.014545067151387533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,2048,32,0.014436266819636025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,8192,0.020641066630681357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,12288,0.022027732928593953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,10240,0.021316266059875487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,7168,0.020669867595036827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,6144,0.019705599546432494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,5120,0.019672532876332603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,4096,0.01802133321762085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,3584,0.017903999487559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,2048,0.01696853240331014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,65536,0.04292693138122559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,1536,0.015476266543070475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,3072,0.01824000080426534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,2560,0.017388800779978432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,1024,0.015170133113861084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,512,0.014777599771817525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,768,0.014965333541234336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,64,0.014593066771825156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,256,0.01474240024884542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,128,0.014563199877738953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1536,32,0.014663466811180114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,8192,0.01861013372739156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,12288,0.0200000007947286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,16384,0.021950932343800862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,7168,0.018862932920455933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,10240,0.020131200551986694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,65536,0.03388479948043823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,6144,0.018179200092951455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,5120,0.021381332476933797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,4096,0.02023893396059672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,3584,0.02095466653505961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,2560,0.02057066758473714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,3072,0.022170666853586832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,2048,0.015836800138155617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,512,0.01486186683177948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,1536,0.015364266435305276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,1024,0.015159466862678527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,768,0.015121066570281982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,128,0.014526933431625366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,256,0.014520532886187234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,64,0.01458453337351481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,8192,0.018355200688044228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,1024,32,0.014545067151387533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,16384,0.020814933379491172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,12288,0.01921280026435852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,7168,0.0187285323937734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,10240,0.01930026610692342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,5120,0.019428267081578573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,6144,0.018943999210993448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,65536,0.030849067370096843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,4096,0.018127999703089395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,1536,0.016131200393040977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,2560,0.017157334089279174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,3584,0.018362667163213095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,3072,0.01802560091018677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,2048,0.016696532567342125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,768,0.015367466211318969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,1024,0.015617066621780395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,512,0.01504853367805481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,256,0.014934399724006652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,128,0.014325333635012307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,64,0.014640000462532044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,768,32,0.014748799800872802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,8192,0.018437333901723228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,16384,0.019115734100341796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,12288,0.01876586675643921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,10240,0.018598399559656777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,65536,0.02641493280728658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,7168,0.01875200072924296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,4096,0.01735146641731262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,6144,0.018547199169794717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,3584,0.017052799463272095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,5120,0.017925333976745606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,3072,0.016808533668518068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,2560,0.01619733373324076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,2048,0.01606613298257192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,1024,0.015057067076365152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,1536,0.01541866660118103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,512,0.014939733346303306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,768,0.015006933609644571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,256,0.014863999684651694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,128,0.014667733510335287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,64,0.014572800199190775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,65536,0.023238400618235268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,16384,0.018600533405939736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,512,32,0.014697600404421488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,12288,0.018654932578404747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,10240,0.020013866821924846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,8192,0.019350399573644005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,5120,0.017879466215769448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,7168,0.019050665696461997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,4096,0.017193599541982015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,3072,0.016379732886950174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,6144,0.0184063990910848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,3584,0.01697493394215902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,2560,0.01607360045115153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,1536,0.015435733397801719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,2048,0.015983999768892924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,512,0.01480959951877594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,768,0.014990933736165366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,1024,0.015066666404406228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,128,0.014619732896486918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,256,0.014482133587201438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,32,0.014566399653752646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,16384,0.019492266575495402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,256,64,0.014512000481287637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,12288,0.018539732694625853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,65536,0.0207370658715566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,10240,0.018732800086339315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,5120,0.017796266078948974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,8192,0.018475733200709023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,6144,0.018525866667429607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,7168,0.018770132462183634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,3584,0.01665493349234263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,4096,0.01708266735076904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,3072,0.016461867094039916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,2048,0.01585706671079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,2560,0.01609493295351664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,1024,0.01499626636505127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,1536,0.01551040013631185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,512,0.014620799819628397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,768,0.014846932888031007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,64,0.014386133352915446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,256,0.014577066898345948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,128,0.014439466595649719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,128,32,0.014388266205787658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,65536,0.01939093271891276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,8192,0.018295466899871826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,12288,0.018323200941085815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,16384,0.018595200777053834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,10240,0.018630399306615194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,7168,0.01882986625035604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,6144,0.018314667542775474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,5120,0.017706666390101114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,3584,0.016839466492335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,4096,0.017349332571029663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,3072,0.016356266538302102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,2560,0.016198399662971496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,2048,0.01573013365268707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,512,0.014479999740918478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,1536,0.015280000368754067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,1024,0.01493119994799296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,768,0.014568533500035605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,256,0.014578133821487427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,128,0.013884799679120383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,64,0.014428800344467163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,65536,0.02034026583035787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,64,32,0.014300800363222756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,8192,0.018116267522176106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,16384,0.018484266599019368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,12288,0.018362667163213095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,10240,0.01848213275273641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,7168,0.018480000893274943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,5120,0.01770240068435669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,3584,0.016979199647903443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,6144,0.01830186645189921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,4096,0.01694399913152059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,3072,0.016251732905705772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,1536,0.01516266663869222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,2560,0.016090666254361473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,2048,0.015809067090352378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,1024,0.014968533317248026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,768,0.015122133493423461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,512,0.014703999956448874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,256,0.014301866292953491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,64,0.013894400000572205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,128,0.014246400197347006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,4,32,32,0.014340266585350037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,4096,0.08006292978922526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,5120,0.09622186819712321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,6144,0.1128597338994344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,7168,0.12792106469472247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,8192,0.14591253598531087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,10240,0.1789888064066569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,3584,0.07124266624450684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,2560,0.05524906714757284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,3072,0.06357333262761435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,12288,0.2089087963104248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,2048,0.04816853205362956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,1536,0.03973013162612915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,1024,0.0337557315826416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,768,0.02762453357378642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,256,0.0193066676457723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,512,0.022983467578887938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,128,0.01685546636581421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,64,0.01616426706314087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,32,0.016402133305867515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,8192,0.046029865741729736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,6144,0.038482133547465006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,7168,0.04209599892298381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,10240,0.053756801287333164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,65536,16384,0.2743797302246094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,12288,0.061210668087005614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,5120,0.034740265210469565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,16384,0.07651093006134033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,1536,0.020806399981180827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,3072,0.027026132742563887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,4096,0.030770132939020794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,3584,0.02860586643218994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,2560,0.02613439957300822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,2048,0.022747733195622764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,1024,0.018810667594273887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,768,0.018052266041437785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,512,0.015961600343386333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,256,0.01532906691233317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,128,0.01474133332570394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,64,0.01490133305390676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,32,0.014906666676203408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,7168,0.03741439978281657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,6144,0.03415573438008626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,8192,0.03910719950993856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,10240,0.04724586804707845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,12288,0.052909866968790686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,16384,0.0652181347211202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,2048,0.021227733294169108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,2560,0.022667733828226726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,5120,0.031241599718729657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,4096,0.02797866662343343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,3584,0.025811199347178144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,3072,0.02433919906616211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,1536,0.019592533508936562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,1024,0.017867734034856163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,64,0.014779733618100485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,512,0.015262933572133383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,768,0.016500266393025716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,256,0.014978133638699851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,128,0.014941866199175516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,32,0.014506666858990987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,8192,0.03557866811752319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,16384,65536,0.27265707651774085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,7168,0.03261866569519043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,10240,0.04028693437576294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,12288,0.04516160090764364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,16384,0.05859839916229248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,5120,0.029738666613896687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,3072,0.02234026590983073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,4096,0.024859732389450072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,6144,0.030514132976531983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,3584,0.02354453404744466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,2560,0.021191465854644775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,2048,0.0201749324798584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,1536,0.01866133411725362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,1024,0.01709653337796529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,128,0.014734933773676554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,768,0.015196800231933594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,12288,65536,0.21845547358194986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,512,0.015012266238530478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,256,0.014978133638699851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,32,0.014572800199190775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,64,0.014711466431617738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,8192,0.03143360018730164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,10240,0.03483413457870484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,12288,0.04162666797637939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,7168,0.02876373330752055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,6144,0.026858667532602947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,16384,0.04644906520843506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,3072,0.021013333400090536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,3584,0.022006400426228843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,2560,0.01989226738611857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,10240,65536,0.17786026000976562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,2048,0.01904319922129313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,5120,0.025277866919835405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,4096,0.022924800713857017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,512,0.0147189329067866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,1536,0.017987199624379478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,768,0.01574186682701111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,1024,0.016225066781044007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,256,0.015063466628392539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,128,0.014679466684659323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,64,0.014625066518783569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,32,0.014630400141080222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,6144,0.028178133567174274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,7168,0.029959466060002642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,8192,0.032008532683054605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,10240,0.035537068049112955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,12288,0.03897173404693603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,16384,0.04645013411839803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,8192,65536,0.15294933319091797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,5120,0.0241482675075531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,4096,0.022299732764561972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,3584,0.02145706613858541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,3072,0.020376533269882202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,2560,0.01955839991569519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,1024,0.015511467059453329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,2048,0.01848640044530233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,1536,0.01832533280054728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,768,0.015238400300343832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,64,0.014669866363207499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,512,0.015201066931088766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,256,0.015013333161671957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,128,0.01467519998550415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,32,0.014755200346310934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,12288,0.03529813289642334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,7168,0.026949334144592284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,8192,0.028681600093841554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,10240,0.03202133377393086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,6144,0.02523733377456665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,16384,0.04091093142827352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,5120,0.02339413364728292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,7168,65536,0.1308128039042155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,4096,0.021708800395329794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,1536,0.016313599546750386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,3584,0.020883200565973918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,2560,0.019156267245610557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,3072,0.020001065731048585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,2048,0.017917867501576742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,1024,0.014924800395965577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,512,0.015046399831771851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,768,0.015507200360298156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,128,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,256,0.014940800269444785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,64,0.014658133188883463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,32,0.014653866489728292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,16384,0.03773760000864665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,6144,0.022667733828226726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,6144,65536,0.11863040129343669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,7168,0.02406719923019409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,8192,0.025554132461547852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,10240,0.028060799837112425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,12288,0.033113600810368855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,5120,0.021333332856496176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,4096,0.020205867290496827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,3584,0.019924267133076986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,1024,0.015440000096956888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,3072,0.019014400243759156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,2048,0.017388800779978432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,2560,0.01842026710510254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,1536,0.0157258669535319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,256,0.014749866724014283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,768,0.015180800358454385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,512,0.015238400300343832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,128,0.014663466811180114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,64,0.01458453337351481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,32,0.014793599645296732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,5120,65536,0.10313920180002849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,10240,0.02736213405927022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,16384,0.03335893154144287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,6144,0.023448532819747923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,7168,0.024333866437276204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,12288,0.02954453428586324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,8192,0.025439999500910443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,5120,0.020038400093714395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,4096,0.019118932882944743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,3584,0.018636800845464072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,3072,0.018149334192276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,2560,0.017474132776260375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,2048,0.01618133286635081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,1536,0.015198933084805808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,512,0.01495039959748586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,768,0.01525759994983673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,1024,0.015092266599337259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,256,0.014916266997655234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,128,0.014573867122332254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,64,0.014630400141080222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,65536,0.08364906311035156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,4096,32,0.014633599917093912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,16384,0.03186239997545878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,12288,0.027618134021759035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,8192,0.024077866474787393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,7168,0.023960532744725545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,6144,0.02286613384882609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,5120,0.019768534104029338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,4096,0.01880106727282206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,10240,0.025983999172846477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,3584,0.01819733381271362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,1024,0.015449600418408713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,3072,0.01770026683807373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,2560,0.017086933056513466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,2048,0.01590506633122762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,1536,0.01546346644560496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,768,0.015270400047302245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,512,0.014679466684659323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,256,0.014737066626548768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,65536,0.07433066368103028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,128,0.014646400014559427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,64,0.014568533500035605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3584,32,0.014552533626556396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,10240,0.025040000677108765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,8192,0.023850667476654052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,12288,0.02732586661974589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,16384,0.03072426716486613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,6144,0.02172693411509196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,7168,0.023167999585469563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,5120,0.019476266702016194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,4096,0.018497065703074137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,3584,0.018103466431299845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,3072,0.017083734273910522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,2048,0.015619200468063355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,2560,0.01634880006313324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,512,0.014935466647148132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,768,0.015237333377202353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,1536,0.015637333194414772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,1024,0.015383467078208923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,256,0.014413866400718688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,65536,0.06506773233413696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,32,0.01452906628449758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,128,0.014523733655611673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,3072,64,0.014756266276041666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,7168,0.021951999266942343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,12288,0.025102933247884113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,10240,0.02386666735013326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,8192,0.022805333137512207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,16384,0.028016000986099243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,5120,0.019105066855748497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,4096,0.0184661328792572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,6144,0.021233065923055013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,3584,0.017698132991790773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,3072,0.016280532876650492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,65536,0.060193065802256265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,2560,0.0160970667997996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,128,0.014487466216087342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,1024,0.015330132842063905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,1536,0.015706666310628257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,2048,0.015828266739845276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,512,0.015051733454068503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,768,0.015075199802716575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,256,0.014549332857131957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,32,0.01455466647942861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2560,64,0.014546133081118264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,6144,0.020594133933385213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,10240,0.022929066419601442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,12288,0.022985599438349404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,7168,0.020776534080505372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,8192,0.021597866217295328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,16384,0.025214932362238568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,4096,0.017293866475423178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,5120,0.018863999843597413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,3584,0.01648853321870168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,2560,0.01583253343900045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,3072,0.015826132893562318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,1536,0.015655466914176942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,1024,0.015264000495274863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,2048,0.015689599514007568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,65536,0.0511626680692037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,768,0.015069866180419922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,32,0.014519466956456503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,256,0.014419200023015341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,512,0.01506239970525106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,128,0.014600533246994018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,2048,64,0.014731733004252115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,16384,0.0236735999584198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,10240,0.021206400791803994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,12288,0.022146133581797282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,8192,0.02041813333829244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,7168,0.020530132452646892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,6144,0.0194815993309021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,5120,0.020461867252985634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,4096,0.019246933857599895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,3584,0.018926932414372762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,65536,0.04229226509730021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,3072,0.01807253360748291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,2560,0.017339734236399333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,2048,0.01709866722424825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,512,0.014964266618092855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,768,0.015080533425013223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,1536,0.015465600291887918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,1024,0.015361066659291586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,256,0.014577066898345948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,64,0.014275200168291726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,128,0.014574933052062988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,16384,0.02216213345527649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1536,32,0.014646400014559427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,12288,0.02004800041516622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,10240,0.02022613286972046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,5120,0.023982934157053628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,7168,0.018385066588719686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,6144,0.018422400951385497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,8192,0.018722132841746012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,65536,0.03350186745325724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,4096,0.023689599831899007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,2560,0.020677334070205687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,3584,0.022617600361506143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,3072,0.02168853282928467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,2048,0.015624533096949259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,256,0.014732799927393594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,768,0.01497066617012024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,1536,0.015532799561818442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,1024,0.015212800105412802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,512,0.014994133512179056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,64,0.014711466431617738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,128,0.014582399527231851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,1024,32,0.014203733205795288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,16384,0.020773333311080933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,12288,0.01949653426806132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,10240,0.018480000893274943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,8192,0.0184714674949646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,65536,0.03069973389307658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,4096,0.01918826699256897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,7168,0.01900586684544881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,5120,0.020138667027155558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,6144,0.01834453344345093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,3584,0.018345600366592406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,1536,0.016241066654523215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,3072,0.017485866943995156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,2560,0.017364267508188883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,2048,0.016821332772572837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,768,0.01535040040810903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,1024,0.015705600380897522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,512,0.015169066190719605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,128,0.014654933412869772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,256,0.014820266763369241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,64,0.01451520025730133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,768,32,0.014673067132631936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,16384,0.01875093380610148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,8192,0.018544000387191773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,65536,0.026177066564559936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,12288,0.01849493384361267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,10240,0.01806933283805847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,7168,0.018953599532445273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,6144,0.018248534202575682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,5120,0.01791680057843526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,3584,0.017042134205500284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,4096,0.017417599757512413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,3072,0.016591999928156534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,2560,0.016376533110936484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,2048,0.016013866662979125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,512,0.01504746675491333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,128,0.014548266927401224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,1536,0.015459199746449789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,1024,0.015135999520619711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,768,0.015169066190719605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,256,0.014470400412877402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,64,0.014731733004252115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,16384,0.018668800592422485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,512,32,0.014545067151387533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,65536,0.022986666361490885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,12288,0.018407466014226277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,10240,0.019552000363667808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,7168,0.01874133348464966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,8192,0.019296000401178993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,5120,0.017826133966445924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,6144,0.01844053268432617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,3584,0.016871466239293417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,4096,0.017189333836237587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,3072,0.016321067015329996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,1024,0.015117866794268289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,2048,0.015593600273132325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,2560,0.016099199652671814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,1536,0.015451733271280924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,512,0.014991999665896098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,768,0.015077333648999533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,64,0.0145087997118632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,256,0.01455573340257009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,32,0.014195199807484946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,256,128,0.014481066664059957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,65536,0.01975040038426717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,16384,0.019499733050664266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,8192,0.018026665846506754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,12288,0.018487467368443807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,10240,0.018739199638366698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,7168,0.01882986625035604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,6144,0.01821119983990987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,5120,0.017731199661890663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,3072,0.01636693378289541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,2560,0.01606826682885488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,4096,0.017102932929992674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,3584,0.016820265849431356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,2048,0.01578879952430725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,1024,0.014947199821472168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,512,0.014807466665903726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,1536,0.0153546671072642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,768,0.014974932869275412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,256,0.01461120049158732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,128,0.014285866419474283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,64,0.01439573367436727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,16384,0.018564265966415406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,128,32,0.01448853313922882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,65536,0.018900267283121743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,8192,0.018372267484664917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,12288,0.01818986733754476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,10240,0.01867626706759135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,7168,0.01878933310508728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,5120,0.017930666605631508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,6144,0.018286933501561485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,3584,0.016616533199946083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,4096,0.017222400506337485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,3072,0.016370133558909098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,2048,0.01565226713816325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,2560,0.015961600343386333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,1024,0.01480959951877594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,1536,0.015228799978892007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,128,0.014345600207646688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,512,0.01427733302116394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,768,0.01474666694800059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,256,0.01448853313922882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,64,0.014246400197347006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,65536,0.020076799392700195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,12288,0.018151466051737467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,64,32,0.014276267091433207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,10240,0.018461867173512777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,16384,0.018651733795801796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,8192,0.01800533334414164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,7168,0.01859626571337382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,4096,0.01699840029080709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,5120,0.01766293247540792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,6144,0.018194133043289186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,3072,0.01634239951769511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,3584,0.016662399967511496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,2560,0.015973333517710367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,1024,0.015284267067909241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,768,0.014756266276041666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,1536,0.015324800213177999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,2048,0.015666133165359496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,256,0.014206932981808982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,512,0.014578133821487427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,32,0.014338133732477823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,128,0.014186666409174601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,2,32,64,0.014219733079274497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,4096,0.0796895980834961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,5120,0.09574613571166993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,6144,0.11329493522644044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,7168,0.12750399907430013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,8192,0.14507733980814616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,2560,0.055661865075429286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,3584,0.07088106473286947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,10240,0.17680853207906086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,3072,0.0636298656463623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,2048,0.04824533462524414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,1536,0.03941333293914795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,1024,0.03126293420791626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,256,0.019284266233444213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,768,0.027281065781911213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,512,0.02436800003051758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,32,0.01637440025806427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,12288,0.2083946704864502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,128,0.015618133544921874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,64,0.016074666380882265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,65536,16384,0.2739114761352539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,6144,0.0383135994275411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,7168,0.04215039809544881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,8192,0.05035306612650553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,10240,0.05470293362935384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,12288,0.06118933359781901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,2560,0.02483839988708496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,4096,0.03049813310305278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,5120,0.03723093271255493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,16384,0.07610773245493571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,3584,0.02889066735903422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,3072,0.026978133122126262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,2048,0.022648533185323082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,1536,0.02100800077120463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,768,0.01798293391863505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,1024,0.018758400281270345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,512,0.015260799725850423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,256,0.0151829332113266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,128,0.014983466267585755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,32,0.014947199821472168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,64,0.01495253344376882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,7168,0.03671253522237142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,6144,0.03416426579157512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,8192,0.040725334485371904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,10240,0.046562135219573975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,12288,0.05251306692759196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,16384,0.06477973461151124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,5120,0.03118399977684021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,4096,0.02794559995333354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,3584,0.026338134209314985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,1536,0.019573332866032918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,2560,0.022552533944447836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,2048,0.021116799116134642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,3072,0.024382932980855306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,512,0.015524267156918844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,1024,0.017911465962727864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,768,0.017017600933710735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,128,0.014544000228246054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,256,0.015037866433461508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,32,0.014761599898338317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,64,0.014784000317255654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,8192,0.0364522655804952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,7168,0.033079467217127484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,10240,0.040161065260569256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,12288,0.04493546485900879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,16384,0.058501334985097256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,6144,0.03113493323326111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,5120,0.027982934315999346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,3584,0.02341866691907247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,2560,0.02173759937286377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,4096,0.02525866627693176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,16384,65536,0.2682623863220215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,3072,0.022504534324010214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,1024,0.017324799299240114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,1536,0.018710400660832724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,2048,0.02002133329709371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,768,0.015371732910474143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,12288,65536,0.2185002644856771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,512,0.014571733276049294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,256,0.01511679987112681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,128,0.014722133676211039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,64,0.014651733636856078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,32,0.014756266276041666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,8192,0.03088639974594116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,10240,0.03499840100606282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,7168,0.029075199365615846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,16384,0.050261334578196204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,12288,0.03853653271993001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,6144,0.027167999744415285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,5120,0.025037866830825806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,3072,0.021150932709376017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,3584,0.022021333376566567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,2560,0.019882667064666747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,2048,0.019156267245610557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,4096,0.023257599274317423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,1024,0.015713066856066386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,1536,0.01795626680056254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,10240,65536,0.17609492937723797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,768,0.015545599659283958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,512,0.015240533153216043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,256,0.014916266997655234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,64,0.014604799946149192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,128,0.014703999956448874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,32,0.0147189329067866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,8192,0.03213653365770976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,6144,0.02830186684926351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,7168,0.030131200949350994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,16384,0.045746131738026934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,10240,0.035837864875793456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,12288,0.03842666546503703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,4096,0.022154666980107627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,3584,0.02155733307202657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,5120,0.02416960000991821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,3072,0.020154666900634766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,2560,0.019789866606394448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,1536,0.017806933323542277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,8192,65536,0.1482250690460205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,2048,0.018641066551208497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,768,0.01552959978580475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,1024,0.015384533007939658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,512,0.015000533064206442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,128,0.014647466937700906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,256,0.01490239997704824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,64,0.014751999576886495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,32,0.01456106702486674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,10240,0.030563199520111085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,12288,0.03480106592178345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,8192,0.028823467095692952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,7168,0.02547733386357625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,6144,0.0249781330426534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,5120,0.022772266467412313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,16384,0.039399464925130204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,7168,65536,0.1302783966064453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,4096,0.021171200275421142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,2048,0.0182805339495341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,3584,0.0210698664188385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,3072,0.019371734062830607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,2560,0.019053866465886436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,1536,0.0164490669965744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,512,0.015146666765213012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,1024,0.015333333611488342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,768,0.015313067038853965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,256,0.01479039986928304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,128,0.014571733276049294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,64,0.014729600151379904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,32,0.014756266276041666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,6144,65536,0.11488853295644123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,12288,0.03256319959958394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,6144,0.022929066419601442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,16384,0.03778453270594279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,10240,0.028258132934570312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,8192,0.02590293288230896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,7168,0.024114133914311726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,5120,0.021538132429122926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,4096,0.02005866765975952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,3584,0.019782400131225585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,3072,0.01883093317349752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,1024,0.015546666582425437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,2560,0.018413867553075156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,2048,0.017401599884033205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,1536,0.015800533692042033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,128,0.01458133359750112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,768,0.015264000495274863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,512,0.015221333503723145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,256,0.014838400483131408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,64,0.014601600170135499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,32,0.014667733510335287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,5120,65536,0.10240426858266194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,6144,0.023491199811299643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,7168,0.024462932348251344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,16384,0.03339306513468425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,12288,0.02955840031305949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,5120,0.020258132616678873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,8192,0.025628799200057985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,10240,0.027637332677841187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,2048,0.01607146660486857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,4096,0.019179733594258626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,3072,0.018102399508158364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,3584,0.018731733163197838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,2560,0.017505067586898803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,1536,0.015629866719245912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,1024,0.01537493367989858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,768,0.01530880033969879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,256,0.014710399508476257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,512,0.014987732966740927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,64,0.014276267091433207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,128,0.01458026667435964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,65536,0.08367040157318115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,4096,32,0.014667733510335287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,12288,0.027693865696589152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,8192,0.02413439949353536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,7168,0.02385279933611552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,16384,0.03218560020128886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,10240,0.026129066944122314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,6144,0.022851200898488362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,5120,0.019720532496770225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,4096,0.0187285323937734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,3584,0.01830079952875773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,2560,0.016547200083732606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,3072,0.017786665757497152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,2048,0.015761066476504007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,768,0.01513706644376119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,1024,0.015385599931081137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,1536,0.01566506624221802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,256,0.014735999703407287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,512,0.014689067006111145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,128,0.014612266421318054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,32,0.014588800072669984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,65536,0.07481173674265543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3584,64,0.014547200004259745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,10240,0.025552000602086383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,12288,0.02677759925524394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,7168,0.023177599906921385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,8192,0.024210133155186973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,16384,0.030452267328898115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,5120,0.019337600469589232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,6144,0.022219733397165934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,4096,0.018257067600886027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,3072,0.01685439944267273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,3584,0.01813546617825826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,2560,0.01621119976043701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,2048,0.015677866339683533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,1536,0.015496533115704855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,512,0.014929067095120749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,1024,0.015347199638684592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,768,0.014939733346303306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,256,0.014667733510335287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,65536,0.06473173300425211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,128,0.014663466811180114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,64,0.014552533626556396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,3072,32,0.014476799964904785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,10240,0.02373440066973368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,12288,0.024936532974243163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,16384,0.027830400069554645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,4096,0.01811199982961019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,8192,0.02255679965019226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,7168,0.02204586664835612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,6144,0.021074134111404418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,5120,0.018897066513697304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,3584,0.017568000157674155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,2560,0.016307199994723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,3072,0.016950400670369466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,2048,0.015898666779200234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,1536,0.015637333194414772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,65536,0.05940693219502767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,256,0.014617600043614707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,512,0.014605866869290671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,1024,0.015282133221626281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,768,0.015069866180419922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,128,0.014633599917093912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,64,0.014604799946149192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2560,32,0.014628266294797262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,16384,0.02544959982236226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,12288,0.023191465934117635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,10240,0.023143466313680014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,8192,0.021385600169499717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,4096,0.016858667135238647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,7168,0.020950400829315187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,6144,0.020188800493876138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,5120,0.018662399053573607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,3584,0.016609066724777223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,3072,0.01655359963575999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,2560,0.016264533003171287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,2048,0.01586560010910034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,65536,0.05102293491363526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,1536,0.015705600380897522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,1024,0.015335466464360556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,768,0.01511146624883016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,512,0.014780799547831217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,64,0.014502400159835815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,256,0.014774399995803832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,32,0.0146506667137146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,2048,128,0.014604799946149192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,8192,0.020320000251134236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,10240,0.02119893431663513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,16384,0.023744000991185506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,12288,0.02212160031000773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,6144,0.019371734062830607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,7168,0.020252799987792967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,5120,0.0198634664217631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,4096,0.01919680039087931
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,2048,0.01700906753540039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,3584,0.018959999084472656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,3072,0.018212266763051353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,2560,0.01762239933013916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,65536,0.04209280014038086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,1536,0.01567146678765615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,1024,0.015229866902033488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,512,0.01488746702671051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,768,0.0151146670182546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,256,0.014361600081125895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,128,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,64,0.014564266800880432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1536,32,0.014640000462532044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,12288,0.0201749324798584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,10240,0.020119466384251914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,16384,0.021733333667119346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,8192,0.01840106646219889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,7168,0.018525866667429607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,4096,0.024615466594696045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,6144,0.018514132499694823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,3584,0.023667200406392416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,65536,0.033115732669830325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,5120,0.027124265829722088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,3072,0.022094933191935222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,2560,0.02065920035044352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,1536,0.015629866719245912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,768,0.01504853367805481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,2048,0.015987199544906617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,1024,0.015194666385650635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,512,0.014846932888031007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,256,0.014802133043607077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,64,0.014237866799036662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,128,0.014587733149528503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,1024,32,0.014532267053922018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,16384,0.020803199211756388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,12288,0.018905599912007652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,5120,0.020358399550120036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,10240,0.018309332927068076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,8192,0.018692266941070557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,7168,0.018620800971984864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,6144,0.018402133385340372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,65536,0.03020693262418111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,3072,0.0179967999458313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,4096,0.019296000401178993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,3584,0.018663465976715088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,2048,0.016952532529830932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,2560,0.01744640072186788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,1536,0.016057599584261575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,512,0.015031466881434122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,1024,0.015380266308784484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,768,0.015411200126012168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,256,0.014907733599344889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,128,0.014641066392262777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,16384,0.019237333536148073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,64,0.014786133170127868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,768,32,0.014604799946149192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,12288,0.018313600619633993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,10240,0.018780799706776936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,65536,0.02611306707064311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,8192,0.018566399812698364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,7168,0.018950400749842326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,6144,0.018355200688044228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,5120,0.018181333939234413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,4096,0.01729493339856466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,3072,0.016821332772572837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,3584,0.016985599199930826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,2560,0.0162090669075648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,1024,0.015252266327540079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,2048,0.016057599584261575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,1536,0.015592533349990844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,256,0.01479680041472117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,768,0.015213867028554281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,512,0.014873600006103516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,32,0.014549332857131957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,128,0.01454080045223236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,512,64,0.014386133352915446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,65536,0.022817067305246987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,16384,0.018411733706792197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,7168,0.018860799074172974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,12288,0.018602667252222697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,10240,0.020119466384251914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,8192,0.019565866390864054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,5120,0.017745065689086913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,6144,0.018580265839894614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,3072,0.016420267025629678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,3584,0.016797866423924765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,4096,0.01715839902559916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,2560,0.016277333100636802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,1024,0.015061333775520325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,2048,0.015871999661127727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,1536,0.015406933426856995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,768,0.01493333379427592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,512,0.014791466792424521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,64,0.01442026694615682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,256,0.014597333470980325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,128,0.014556800325711569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,256,32,0.01422826647758484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,8192,0.018295466899871826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,65536,0.020292266209920248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,16384,0.0194048007329305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,12288,0.018203733364741008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,10240,0.018668800592422485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,7168,0.01884373426437378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,6144,0.018256000677744546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,4096,0.016909867525100708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,5120,0.01760960022608439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,3584,0.016919465859731038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,3072,0.016522666811943053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,768,0.015144532918930054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,2560,0.016103466351826988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,2048,0.015848533312479655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,1536,0.01548693378766378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,1024,0.015218133727709452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,512,0.014490666985511779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,64,0.014439466595649719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,256,0.01472106675306956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,128,0.014401066303253173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,128,32,0.014331733187039694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,65536,0.019086933135986327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,16384,0.018436266978581747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,12288,0.0180351992448171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,8192,0.0181877334912618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,10240,0.01858453353246053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,7168,0.018720000982284546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,6144,0.018467199802398682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,2560,0.01626240015029907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,4096,0.01704853375752767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,5120,0.017669334014256795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,3584,0.016782933473587038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,3072,0.01648533344268799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,2048,0.015622400244077048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,768,0.014995200435320535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,1536,0.015633066495259605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,1024,0.014806399742762247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,512,0.014479999740918478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,256,0.01434879998366038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,65536,0.01991893251736959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,32,0.013946666320164999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,128,0.014212266604105631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,64,64,0.014362667004267374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,16384,0.018304000298182167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,12288,0.018132267395655315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,7168,0.018577067057291667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,10240,0.01849386692047119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,6144,0.01813973387082418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,8192,0.018193066120147705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,5120,0.01758293310801188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,4096,0.017004799842834473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,2560,0.016311466693878174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,3584,0.016753067572911583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,3072,0.016193067034085594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,2048,0.01556373337904612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,1536,0.015108266472816467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,1024,0.014912000298500061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,256,0.014439466595649719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,768,0.014719999829928079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,512,0.014616533120473226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,32,0.014242133498191834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,128,0.014378666877746582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8,1,32,64,0.014251733819643656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,4096,3.9676778157552084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,5120,4.838020324707031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,6144,5.78734385172526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,7168,6.690674336751302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,8192,7.711297098795574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,10240,9.796776326497396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,3584,3.459460194905599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,3072,3.0961451212565105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,2560,2.5979466756184895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,2048,2.1024159749348956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,1536,1.7075178782145184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,256,0.7764821370442708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,12288,11.331755574544271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,128,0.7123711903889973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,512,0.9329727808634439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,1024,1.2472447713216146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,768,1.1111466725667318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,16384,16.966358439127603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,6144,1.4398581186930337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,7168,1.6629802703857421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,8192,1.89453125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,10240,2.332469431559245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,12288,2.87027104695638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,5120,1.2031370798746743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,3584,0.871732266743978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,4096,0.9687050501505533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,16384,3.8105013529459635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,2560,0.630184555053711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,3072,0.7404042561848958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,1536,0.4407583872477214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,2048,0.5390368143717448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,256,0.20390186309814454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,128,0.18670399983723956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,512,0.24271893501281738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,768,0.3046602567036947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,1024,0.33969812393188475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,6144,1.0962708791097007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,7168,1.2681973775227866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,8192,1.3933877309163412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,10240,1.8046187082926433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,12288,2.1740331013997394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,16384,3.0291989644368487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,4096,0.7389354705810547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,3584,0.6467871983846029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,5120,0.9061162948608399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,3072,0.5688970565795899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,2560,0.47750186920166016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,2048,0.41399253209431963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,768,0.22838826179504396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,256,0.15445866584777831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,512,0.18742720286051434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,1536,0.33732372919718423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,1024,0.2561471939086914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,128,0.140993070602417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,7168,1.0417493184407554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,8192,1.1672767639160155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,10240,1.474401092529297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,12288,1.7818368275960286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,16384,2.4500661214192707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,5120,0.7399189631144206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,4096,0.6233738581339519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,6144,0.8954197565714518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,3584,0.5510282516479492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,65536,15.431488037109375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,3072,0.47660268147786455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,2560,0.4040063858032227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,1536,0.2834634780883789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,2048,0.35180479685465493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,1024,0.21829546292622887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,512,0.15658772786458333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,768,0.1929365317026774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,256,0.12968213558197023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,128,0.11875200271606445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,7168,0.8465034484863281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,65536,12.026518758138021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,8192,0.9900565465291342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,10240,1.228265635172526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,12288,1.4950847625732422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,16384,2.089573287963867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,4096,0.5106506665547689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,5120,0.6215167999267578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,3584,0.45556052525838214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,6144,0.7376960118611653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,3072,0.3976832071940104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,2560,0.33940372467041013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,2048,0.2921066602071126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,1536,0.23774933815002441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,512,0.13039999802907307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,1024,0.18133974075317383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,768,0.15949865976969402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,256,0.1083232005437215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,128,0.09839253425598145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,65536,10.40245361328125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,7168,0.766322135925293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,10240,1.0994859059651694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,8192,0.8768128077189127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,12288,1.3392436981201172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,16384,1.7922538757324218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,6144,0.6729824066162109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,5120,0.5517589569091796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,4096,0.4595466613769531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,3584,0.4104906717936198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,3072,0.35582081476847327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,2560,0.3036213239034017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,2048,0.2650858720143636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,768,0.14840319951375325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,512,0.11863466898600261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,1536,0.21617813110351564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,1024,0.16553707122802735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,256,0.09726080099741617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,128,0.08821226755777994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,65536,8.232096862792968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,7168,0.6789663950602214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,8192,0.7584661483764649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,10240,0.9637450536092123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,12288,1.154638926188151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,16384,1.57490234375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,5120,0.4904757181803386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,3584,0.35760533014933266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,4096,0.39863573710123695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,6144,0.5856682459513347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,3072,0.31144959131876626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,2560,0.26472105979919436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,512,0.10135359764099121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,2048,0.2311807950337728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,1536,0.18696533838907878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,1024,0.14198400179545084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,768,0.1252874692281087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,256,0.08328426678975423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,128,0.07629333337148031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,65536,7.71859842936198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,7168,0.5748682657877604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,10240,0.8238154729207358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,8192,0.6515594482421875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,12288,0.9948064168294272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,16384,1.3169780731201173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,5120,0.4156565348307292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,3584,0.30869547526041663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,4096,0.3499605178833008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,6144,0.505353609720866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,3072,0.2692170778910319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,2560,0.23053973515828452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,768,0.11051092942555744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,512,0.08856960137685141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,2048,0.19769919713338216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,1536,0.16288533210754394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,1024,0.12480639616648356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,256,0.07191039721171061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,128,0.06501973470052083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,65536,6.365333557128906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,7168,0.48639891942342117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,8192,0.5458741505940755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,10240,0.684878921508789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,12288,0.8093610763549804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,16384,1.118515141805013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,5120,0.3509567896525065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,6144,0.4195061365763347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,4096,0.2884288152058919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,3584,0.25866667429606116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,3072,0.22534292538960776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,2560,0.1912629286448161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,768,0.08941439787546793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,1536,0.13525652885437012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,2048,0.16585920651753744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,1024,0.10218239625295003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,512,0.07196373144785563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,256,0.058671998977661136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,128,0.053010133902231846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,65536,5.62680409749349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,8192,0.4929087956746419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,7168,0.43889385859171554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,10240,0.6091306686401368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,12288,0.7449034372965495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,16384,1.0144938786824544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,6144,0.38261439005533854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,3584,0.23464533487955727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,4096,0.2647018591562907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,5120,0.31520427068074547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,3072,0.2047541300455729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,2560,0.1760682741800944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,2048,0.15076692899068195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,768,0.0821664015452067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,1536,0.12275306383768718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,512,0.06593279838562012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,1024,0.09409920374552408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,256,0.05321066776911417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,128,0.04797973235448201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,65536,4.497529602050781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,7168,0.39451093673706056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,10240,0.552725346883138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,8192,0.447435728708903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,12288,0.6601696014404297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,16384,0.8781951904296875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,6144,0.33657706578572594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,5120,0.28604265848795574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,4096,0.23682986895243324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,3584,0.21134719848632813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,3072,0.18350720405578613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,2560,0.15611413319905598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,1536,0.10982720057169597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,768,0.07294399738311767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,2048,0.13310506343841552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,1024,0.08378667036692301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,512,0.05819413264592489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,128,0.04202346801757813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,256,0.04688640038172404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,65536,4.150808461507162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,7168,0.34301973978678385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,8192,0.3950624148050944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,10240,0.48192211786905925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,12288,0.5877258936564128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,16384,0.7642069498697917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,6144,0.29814720153808594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,5120,0.249726931254069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,4096,0.21070186297098795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,3584,0.18492053349812826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,3072,0.16215786933898926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,2560,0.13738133112589518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,768,0.06359253327051798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,2048,0.11791893641153972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,1536,0.09581226507822672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,1024,0.07394239902496338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,512,0.05048319896062216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,256,0.04062186479568482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,128,0.03597653309504191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,65536,3.614703877766927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,7168,0.295088005065918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,10240,0.41658773422241213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,8192,0.33585707346598304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,12288,0.5025162696838379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,16384,0.6612735748291015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,6144,0.2581546624501546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,3584,0.15958399772644044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,5120,0.21431999206542968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,4096,0.17936533292134602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,3072,0.1402517318725586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,2560,0.11760640144348145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,2048,0.10035093625386556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,512,0.04238826831181844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,768,0.05354453325271606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,1024,0.06206933259963989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,1536,0.08103253046671549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,128,0.02974826693534851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,256,0.03391253153483073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,65536,3.3298985799153646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,7168,0.2514592011769613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,10240,0.34920425415039064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,8192,0.2846720059712728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,12288,0.4204821268717448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,16384,0.559878412882487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,6144,0.21941547393798827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,4096,0.15211626688639324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,5120,0.1835466702779134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,3584,0.13553813298543294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,3072,0.11751786867777507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,2048,0.08483520348866781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,2560,0.10040853023529053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,768,0.04471786816914876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,512,0.0353983998298645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,1024,0.052520533402760826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,1536,0.06841493447621663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,128,0.023487999041875204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,256,0.027151999870936076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,65536,2.781720479329427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,7168,0.20816747347513834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,10240,0.28742507298787434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,8192,0.23478612899780274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,12288,0.3435061454772949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,16384,0.4539914766947429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,6144,0.17918507258097333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,5120,0.1515498638153076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,4096,0.12369813124338787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,3584,0.11056426366170247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,3072,0.0957856019337972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,2560,0.08113919893900554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,2048,0.0683573325475057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,1536,0.055010131994883214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,768,0.034701867898305254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,1024,0.04145386616388957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,512,0.02672106623649597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,256,0.020549333095550536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,128,0.01771413286526998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,65536,2.2984886169433594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,7168,0.18496640523274738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,8192,0.2094879945119222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,10240,0.25639146169026694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,12288,0.3063946723937988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,6144,0.16061867078145345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,16384,0.40555626551310225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,5120,0.1342367966969808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,3584,0.0973855972290039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,4096,0.10989867051442463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,3072,0.08455680211385092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,2560,0.07178346316019693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,2048,0.060229333241780605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,768,0.02924373348553975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,256,0.017326933145523072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,512,0.02299519975980123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,1536,0.04851733446121216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,1024,0.03509866793950399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,128,0.014667733510335287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,65536,1.8272244771321613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,7168,0.1618805408477783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,8192,0.18296640714009602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,10240,0.2273621400197347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,12288,0.2697098731994629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,6144,0.14010666211446127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,16384,0.3578229268391927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,4096,0.09588159720102946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,5120,0.1174623966217041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,3584,0.0844970703125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,3072,0.07336959838867188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,2560,0.06253759860992432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,2048,0.052331733703613284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,768,0.024446932474772136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,1536,0.041715200742085776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,1024,0.02932586669921875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,512,0.01918826699256897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,256,0.014357333381970724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,128,0.011892267068227132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,65536,1.638701883951823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,7168,0.1395978609720866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,8192,0.1579957326253255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,10240,0.19537386894226075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,12288,0.23526612917582193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,6144,0.11947093009948731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,16384,0.3074624061584473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,5120,0.10005546410878499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,4096,0.08170133431752523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,3584,0.07209280331929525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,3072,0.06292159954706827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,2560,0.05345600048700968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,2048,0.04442773262659709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,1536,0.03412053187688192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,768,0.01967573364575704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,1024,0.024012800057729086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,512,0.015485866864522298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,128,0.008801066875457763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,256,0.01102293332417806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,65536,1.381831487019857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,7168,0.13105493386586506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,8192,0.14981013933817547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,10240,0.1860213279724121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,12288,0.2217408021291097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,6144,0.11280426979064942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,16384,0.2937120119730631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,4096,0.07671253681182862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,5120,0.09401386578877767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,3584,0.06819199721018473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,2048,0.04079999923706055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,2560,0.050429864724477136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,768,0.017802667617797852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,1024,0.022009599208831786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,3072,0.058822401364644374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,1536,0.030535467465718585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,512,0.013906133174896241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,256,0.009900800387064616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,128,0.007740800082683563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,65536,1.1928170522054038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,6144,2.8221163431803387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,7168,3.315779113769531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,8192,3.7604288736979163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,5120,2.3200106302897137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,10240,4.717597961425781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,4096,1.9328394571940106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,12288,5.723243713378906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,3584,1.7161322275797528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,3072,1.4416778564453125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,2048,1.049400520324707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,2560,1.2350826263427734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,1024,0.6250314712524414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,256,0.39071467717488606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,128,0.3596885363260905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,512,0.46500266393025713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,16384,8.052132161458333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,1536,0.8343797047932944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,768,0.5634677251180013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,65536,1.1307615915934244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,6144,0.6875690460205078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,7168,0.8238858540852865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,8192,0.9287722905476888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,10240,1.140219751993815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,5120,0.589852778116862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,12288,1.4126207987467447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,2560,0.31711788177490235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,3072,0.37611093521118166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,3584,0.4333567937215169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,4096,0.4774133364359538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,16384,1.9188779195149739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,2048,0.28001813888549804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,1536,0.22775999704996744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,1024,0.1739456017812093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,768,0.15734400749206542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,256,0.10514559745788574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,512,0.12511253356933594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,128,0.09886186917622884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,7168,0.6254421234130859
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,6144,0.5340991973876953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,8192,0.7084927876790365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,12288,1.0424757639567057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,10240,0.8880885442097982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,16384,1.4731061299641928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,4096,0.36804587046305337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,5120,0.4492608070373535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,2560,0.24790827433268228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,3584,0.3296031951904297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,3072,0.28663466771443685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,256,0.08119253317515054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,1536,0.17558399836222333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,2048,0.21449386278788246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,768,0.12004053592681885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,512,0.09725866317749024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,1024,0.133681058883667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,128,0.07601919968922934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,8192,0.6066432317097982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,7168,0.5193663914998372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,10240,0.7391178766886394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,12288,0.9067978541056314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,16384,1.2087083180745444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,5120,0.3757386525472005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,4096,0.31282240549723306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,6144,0.4586751937866211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,3584,0.28405332565307617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,3072,0.24530453681945802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,2048,0.18149545987447102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,65536,7.718894958496094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,1024,0.11428693135579426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,2560,0.20789119402567544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,1536,0.14933546384175617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,256,0.06947306791941324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,768,0.1025696039199829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,128,0.06427839994430543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,512,0.0825482686360677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,7168,0.42758932113647463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,8192,0.49782826105753586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,10240,0.6056917190551758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,65536,6.127567036946615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,12288,0.7249546686808268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,6144,0.3719146728515625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,16384,1.0137749354044596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,5120,0.3134197235107422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,3072,0.2007274627685547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,3584,0.2343221346537272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,4096,0.25880533854166665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,1536,0.12354666392008465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,2560,0.17149546941121419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,2048,0.14994880358378093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,1024,0.09442346890767415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,768,0.083897598584493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,512,0.06827519734700521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,256,0.05655146837234497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,128,0.05274666547775268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,8192,0.4418496131896973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,65536,4.948415120442709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,7168,0.3823786735534668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,10240,0.5415487925211588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,12288,0.6614666620890299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,16384,0.8708735783894858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,3584,0.20697387059529623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,4096,0.2316373348236084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,5120,0.27728853225708006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,6144,0.33626880645751955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,3072,0.18267946243286132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,1536,0.11078613599141438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,2560,0.15478399594624836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,2048,0.13442452748616535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,512,0.062370133399963376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,1024,0.08645866711934408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,768,0.07618239720662436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,128,0.047133866945902506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,256,0.051227732499440515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,7168,0.33369919459025066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,8192,0.3802805264790853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,65536,3.9854751586914063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,10240,0.47682987848917646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,12288,0.5609376271565755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,4096,0.20136747360229493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,16384,0.789523188273112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,3584,0.1820053259531657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,5120,0.24210240046183268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,6144,0.2912106513977051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,2048,0.11886400381724041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,3072,0.15753919283548992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,2560,0.1343509356180827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,256,0.04422826766967773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,1536,0.0970911979675293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,512,0.053220268090566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,768,0.06534719864527384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,1024,0.07401173114776612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,128,0.041162665685017905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,65536,3.5798517862955728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,8192,0.32748053868611654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,7168,0.28799146016438804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,10240,0.40458879470825193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,12288,0.4958655993143718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,5120,0.21033493677775064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,16384,0.6607199986775716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,6144,0.25349013010660804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,3584,0.15650026003519696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,4096,0.1755488077799479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,3072,0.13814187049865723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,2048,0.10205226739247639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,2560,0.11775893370310467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,512,0.04679893255233765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,1024,0.06522026856740316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,768,0.05749333302179972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,1536,0.08391893704732259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,256,0.038574934005737305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,128,0.03535679976145427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,65536,3.142308298746745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,10240,0.3417450586954752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,7168,0.2421621322631836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,8192,0.2778677304585775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,12288,0.4039125442504883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,6144,0.21200853983561196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,16384,0.5459274927775065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,4096,0.1466986656188965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,5120,0.17715733846028645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,3584,0.132041597366333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,2560,0.09824639956156413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,3072,0.11402239799499511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,1536,0.0702303965886434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,2048,0.0850879987080892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,1024,0.05332266489664713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,768,0.047244799137115476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,512,0.03866986831029256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,256,0.03169599970181783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,128,0.029157332579294842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,7168,0.22006613413492837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,65536,2.656666564941406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,8192,0.2479093392690023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,10240,0.30811945597330725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,12288,0.37387733459472655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,16384,0.4930272102355957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,5120,0.1607221285502116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,6144,0.19159679412841796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,4096,0.133622407913208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,3072,0.10460480054219563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,3584,0.1198197364807129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,2560,0.09005013306935629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,1536,0.06405653158823649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,1024,0.04979093472162883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,768,0.043416531880696614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,2048,0.0775434652964274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,512,0.035403732458750406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,256,0.029024000962575274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,128,0.026074665784835815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,65536,2.282885233561198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,7168,0.1977962652842204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,10240,0.2752959887186686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,8192,0.2240682601928711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,12288,0.3273653348286947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,16384,0.4411360104878743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,6144,0.17361706097920734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,3584,0.10681280295054119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,4096,0.12091413338979085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,5120,0.1442954699198405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,3072,0.09310932954152426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,2560,0.08008000055948893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,2048,0.0695904016494751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,1536,0.057044267654418945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,768,0.03849706649780273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,1024,0.044529068470001223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,512,0.03135573267936707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,128,0.023181867599487305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,256,0.025298132499059038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,8192,0.19944106737772624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,65536,2.011964797973633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,7168,0.17591147422790526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,10240,0.24234132766723632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,12288,0.2917311986287435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,5120,0.12741226355234783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,6144,0.1533631960550944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,16384,0.38205547332763673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,4096,0.10668266614278157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,3584,0.09406186739603678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,3072,0.08212160269419352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,1536,0.05053546826044718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,2560,0.0707850694656372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,2048,0.06153920094172159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,1024,0.03870720068613688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,768,0.033326933781305954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,128,0.020062933365503945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,512,0.027038933833440144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,256,0.021721599499384563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,65536,1.7823487599690755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,8192,0.17249280611673992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,7168,0.15051199595133463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,10240,0.20907200177510582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,12288,0.25005653699239094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,16384,0.3370175997416178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,6144,0.1313653310139974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,4096,0.091429336865743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,3584,0.08111253579457602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,5120,0.11058453718821208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,3072,0.07093226909637451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,1024,0.03259733319282532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,1536,0.04323199987411499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,2560,0.06133759816487631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,2048,0.05281493266423544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,128,0.0169322669506073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,768,0.028126933177312213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,512,0.022529067595799764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,256,0.01837973395983378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,65536,1.583075205485026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,8192,0.14592960675557454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,7168,0.1288650671641032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,10240,0.1798357327779134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,12288,0.21311039924621583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,16384,0.2793888092041016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,5120,0.09390400250752767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,6144,0.11242559750874836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,4096,0.07749226888020834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,3072,0.06041813294092814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,3584,0.0687328020731608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,768,0.023525333404541014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,2560,0.052414933840433754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,2048,0.044546135266621906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,1024,0.02720959981282552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,1536,0.035869868596394856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,512,0.019351466496785482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,256,0.015507200360298156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,128,0.014243200421333313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,65536,1.3560970306396485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,8192,0.1201632022857666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,7168,0.10582719643910725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,10240,0.14842665990193685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,12288,0.17664747238159179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,16384,0.23089812596638998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,6144,0.09235626856486003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,4096,0.06354879935582479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,2560,0.0424778660138448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,5120,0.07701226870218912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,3584,0.05645440022150675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,3072,0.04919039805730184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,2048,0.0356544017791748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,1024,0.021824000279108684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,1536,0.028699733813603717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,768,0.018847999970118205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,512,0.015474133690198264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,256,0.012251733740170797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,128,0.01123413344224294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,65536,1.1245269775390625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,7168,0.10005226929982503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,8192,0.11579413414001465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,10240,0.14123412768046062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,12288,0.1660213311513265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,6144,0.08638613224029541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,16384,0.22001280784606933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,3072,0.04547306696573893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,4096,0.05906026760737101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,5120,0.07351893583933512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,3584,0.053206400076548255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,2560,0.04003413518269856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,2048,0.03238720099131266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,65536,0.8841215769449869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,1536,0.025769599278767902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,768,0.016504533092180886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,512,0.013589333494504294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,256,0.010418132940928141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,1024,0.019781333208084107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,128,0.009403733412424724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,7168,0.08227199713389079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,10240,0.11574719746907551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,6144,0.07089599768320719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,8192,0.09311467011769613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,12288,0.13833173116048175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,5120,0.060083198547363284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,16384,0.18231040636698406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,3072,0.0380181352297465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,3584,0.04413653214772542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,4096,0.049634134769439696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,2560,0.03253759940465291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,2048,0.02681279977162679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,1536,0.021695999304453532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,1024,0.016665599743525186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,512,0.011694932977358501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,256,0.009238400061925252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,768,0.01416853368282318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,128,0.008308266599973042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,65536,0.8464640299479166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,6144,0.06322240034739177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,8192,0.08221440315246582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,7168,0.07283626397450765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,10240,0.10199466546376545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,12288,0.12256533304850262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,16384,0.16140586535135906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,3072,0.03354346752166748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,4096,0.044096000989278156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,5120,0.05377493302027384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,3584,0.04001386562983195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,2560,0.02880000074704488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,2048,0.0237610658009847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,65536,0.6921098709106446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,512,0.010198400417963664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,1536,0.019374932845433554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,768,0.01232426663239797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,1024,0.014594133694966635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,256,0.008110933502515157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,128,0.007246933380762736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,6144,0.06139200131098429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,7168,0.07103466987609863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,8192,0.07996479670206705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,10240,0.09887893199920654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,12288,0.11828052997589111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,16384,0.15637866655985516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,5120,0.051787734031677246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,4096,0.04248426755269368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,3072,0.031420799096425374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,3584,0.03745280106862386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,2560,0.027217066287994383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,65536,0.6238805135091146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,2048,0.022275199492772423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,512,0.009347200393676758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,256,0.00724480003118515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,768,0.011412266890207927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,1024,0.013623467087745667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,128,0.006471466521422069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,1536,0.018154666821161906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,6144,1.3794347127278646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,5120,1.1187391916910807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,7168,1.6155403137207032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,8192,1.806297556559245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,65536,0.6007477442423503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,10240,2.488932291666667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,3584,0.8315722783406576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,3072,0.7051125208536784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,4096,0.9236096064249674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,2048,0.5161386807759603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,2560,0.5914922714233398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,256,0.20032426516215004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,12288,2.795233154296875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,1024,0.32194560368855796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,1536,0.4253823916117351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,512,0.23677333196004233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,768,0.2891829490661621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,128,0.18250880241394044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,16384,4.037560526529948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,6144,0.34845867156982424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,7168,0.4049962679545085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,8192,0.4583402633666992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,10240,0.5804757436116537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,12288,0.7049898783365885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,4096,0.24231146176656088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,5120,0.29481598536173503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,3072,0.1889450709025065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,2560,0.16336320241292318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,3584,0.21671147346496583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,16384,0.9043999989827475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,2048,0.14190826416015626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,1536,0.11657813390096028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,512,0.06596693197886148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,768,0.08175679842631021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,1024,0.09073812961578369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,256,0.05648213227589925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,128,0.05258026520411173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,7168,0.30942185719807946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,6144,0.26759678522745767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,10240,0.4315530776977539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,8192,0.3562687873840332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,16384,0.7113546371459961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,12288,0.5325813293457031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,3584,0.16660374005635578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,4096,0.18579626083374023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,5120,0.2239978631337484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,2048,0.11005760033925374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,2560,0.1243776003519694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,3072,0.14629119237263996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,1536,0.0902517318725586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,512,0.05119040012359619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,768,0.06284373203913371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,256,0.043680000305175784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,1024,0.06982186635335287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,128,0.04063040018081665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,10240,0.36614398956298827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,7168,0.2642602602640788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,12288,0.4430826822916667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,8192,0.2972223917643229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,65536,3.88887685139974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,16384,0.608019193013509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,6144,0.22774079640706382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,4096,0.15927467346191407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,3584,0.1416159947713216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,5120,0.19094613393147786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,3072,0.12525440057118734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,2048,0.0929685354232788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,2560,0.10751893520355224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,1536,0.07755626837412516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,128,0.03495039939880371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,1024,0.059546665350596106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,768,0.05312533378601074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,512,0.04384426673253377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,256,0.03742080132166545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,65536,2.957922108968099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,7168,0.21494506200154623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,8192,0.24437333742777506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,12288,0.35865599314371743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,10240,0.3043498675028483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,6144,0.19031359354654948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,16384,0.49740800857543943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,5120,0.15679359436035156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,3584,0.11680106321970622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,4096,0.13100799719492595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,2560,0.08828799724578858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,3072,0.10316906770070393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,2048,0.07762986818949381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,1536,0.06400746504465739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,768,0.044147201379140216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,1024,0.04954986572265625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,256,0.031104000409444173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,512,0.03659733136494954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,128,0.028590933481852217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,65536,2.368871561686198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,8192,0.2213535944620768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,6144,0.16678826014200848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,7168,0.19512426058451335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,10240,0.2718805313110352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,12288,0.32062292098999023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,16384,0.4230047861735026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,5120,0.14232640266418456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,4096,0.11810986995697022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,3584,0.10609493255615235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,2560,0.07916906674702963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,3072,0.09258453051249185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,2048,0.06951680183410644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,768,0.04048639933268229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,1024,0.04547839959462484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,1536,0.058023468653361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,512,0.03356800079345703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,256,0.028253867228825884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,128,0.025889066855112712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,65536,1.9809023539225261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,8192,0.1933568000793457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,10240,0.2334122657775879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,7168,0.16885013580322267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,12288,0.28586241404215496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,6144,0.1477290630340576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,16384,0.3774229367574056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,4096,0.1027232011159261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,5120,0.12437547047932943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,2048,0.06080000003178915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,3584,0.09272747039794922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,2560,0.06960639953613282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,3072,0.08121919631958008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,1536,0.05052479902903238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,768,0.03486400047938029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,1024,0.03919680118560791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,65536,1.6887541453043622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,256,0.024629332621892295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,512,0.028962133328119914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,128,0.0221781333287557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,6144,0.1287722667058309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,10240,0.20330452919006348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,7168,0.1465354601542155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,8192,0.1662314732869466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,12288,0.24654720624287924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,3584,0.08051306406656901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,16384,0.3196330706278483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,5120,0.10867199897766114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,4096,0.08973226547241211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,3072,0.0706496000289917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,2560,0.06036586761474609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,1536,0.044455464680989584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,2048,0.053284267584482826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,128,0.019349332650502524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,1024,0.0347978671391805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,512,0.02553279995918274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,768,0.030643200874328612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,65536,1.5455231984456381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,256,0.02129813234011332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,8192,0.13982399304707843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,7168,0.12455466588338215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,6144,0.10795520146687825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,10240,0.17127466201782227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,16384,0.27204694747924807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,12288,0.20646185874938966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,3584,0.06712426344553629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,2560,0.0511573314666748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,5120,0.0900223970413208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,2048,0.04458026488622029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,4096,0.07588906288146972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,3072,0.059519998232523595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,1024,0.028743465741475422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,1536,0.03700480063756307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,768,0.0253930668036143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,512,0.02072640061378479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,256,0.017968000968297322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,128,0.016426666577657064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,65536,1.2708746592203775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,7168,0.12589759826660157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,8192,0.1419424057006836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,12288,0.20947839419047037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,6144,0.10643733342488607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,10240,0.17708266576131185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,16384,0.2806463877360026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,65536,1.065769577026367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,3072,0.057366398970286045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,4096,0.07549760341644288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,5120,0.09068160057067871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,2560,0.050323200225830075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,3584,0.06532693306605021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,2048,0.040932265917460124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,1536,0.03299840092658997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,128,0.012216533223787945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,768,0.020916267236073812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,256,0.014376533031463624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,512,0.01706026593844096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,1024,0.025832533836364746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,6144,0.08687466780344645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,8192,0.11554346879323323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,16384,0.21867626508076987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,12288,0.1670090675354004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,7168,0.10126187006632488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,10240,0.14176747004191081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,65536,1.0985963185628256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,2048,0.03677759965260823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,3072,0.04849706490834554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,5120,0.07360959847768148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,3584,0.05559253295262655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,4096,0.06219840049743652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,2560,0.04196586608886719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,256,0.014967466394106546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,1536,0.030395734310150146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,1024,0.023545600970586143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,768,0.02076586683591207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,128,0.013498666882514953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,512,0.017321600516637167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,12288,0.16945279439290364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,6144,0.08583146731058756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,10240,0.1446111996968587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,7168,0.1018719991048177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,8192,0.11938453515370687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,16384,0.22284480730692544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,65536,0.8673290888468423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,3072,0.04647359848022461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,4096,0.060158932209014894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,2560,0.040508798758188885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,5120,0.07578240235646566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,3584,0.05590720176696777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,2048,0.034142935276031496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,1536,0.02661866744359334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,256,0.011708799997965496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,128,0.00997866690158844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,512,0.013936000068982443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,768,0.016847999890645345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,1024,0.02142080068588257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,8192,0.08785386880238852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,65536,0.8882741292317708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,7168,0.07660053571065267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,10240,0.10866666634877523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,16384,0.1706720034281413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,6144,0.06733012994130452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,12288,0.1281386693318685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,1536,0.022779732942581177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,3584,0.04256960153579712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,3072,0.03763093153635661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,5120,0.0567413330078125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,512,0.013378133376439413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,4096,0.04750613371531169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,2560,0.03176746765772502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,2048,0.027982934315999346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,256,0.011643733580907185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,1024,0.01801066597302755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,768,0.015892266233762106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,128,0.01051520009835561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,12288,0.11848213672637939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,10240,0.0990015983581543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,6144,0.06106559832890829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,7168,0.07031040191650391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,8192,0.08183786869049073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,16384,0.1566186745961507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,65536,0.6395562489827473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,4096,0.04302719831466675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,3072,0.03308266599973043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,2560,0.02927680015563965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,3584,0.039033599694569904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,5120,0.05327786604563395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,2048,0.024999467531840007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,1536,0.01983039975166321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,256,0.009831466277440389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,128,0.008480000495910644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,512,0.011432533462842304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,768,0.013307733337084451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,1024,0.015516799688339234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,8192,0.06143146753311157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,7168,0.0541536013285319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,16384,0.11808746655782063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,65536,0.6017077128092448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,6144,0.047042131423950195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,10240,0.0752895991007487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,12288,0.09020160039265951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,5120,0.040565331776936844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,4096,0.03287786642710368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,2560,0.022695465882619222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,3584,0.029684267441431683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,3072,0.026267733176549273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,1024,0.012803199887275695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,1536,0.016182399789492288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,2048,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,128,0.007567999760309856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,768,0.011377066373825073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,512,0.009850666920344035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,256,0.008594133456548055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,10240,0.07134079933166504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,8192,0.058361601829528806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,7168,0.0513152003288269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,12288,0.08466453552246093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,6144,0.04432320197423299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,16384,0.11362773577372234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,5120,0.03845653136571248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,65536,0.45082346598307294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,4096,0.030814933776855468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,2048,0.018157867590586345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,3584,0.027688533067703247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,3072,0.02421440084775289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,2560,0.021569067239761354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,1024,0.011974400281906128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,1536,0.014991999665896098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,512,0.008820266524950663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,768,0.010227200388908387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,256,0.007658666869004567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,128,0.006628266473611195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,10240,0.06250666777292888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,6144,0.039018666744232176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,12288,0.07408426602681478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,7168,0.045235200723012285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,8192,0.05181866486867269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,16384,0.09877759615580241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,65536,0.4269717216491699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,3072,0.02169493238131205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,5120,0.0335594654083252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,4096,0.027349332968393963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,2048,0.016484266519546507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,3584,0.025252266724904375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,2560,0.01949653426806132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,1536,0.013948800166447959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,768,0.00951573352018992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,128,0.006585599978764851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,256,0.007565866907437642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,512,0.008374399940172831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,1024,0.010870400071144103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,65536,0.37323627471923826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,16384,0.0919871966044108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,6144,0.036524800459543864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,7168,0.042481064796447754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,8192,0.04829119841257731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,10240,0.0592522660891215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,12288,0.06971413294474284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,5120,0.030763733386993408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,4096,0.025448532899220784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,3072,0.020045866568883262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,2048,0.015015467007954916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,3584,0.022983467578887938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,1536,0.012806399663289388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,2560,0.017734400431315103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,1024,0.009900800387064616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,768,0.00860800047715505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,128,0.0057429333527882894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,256,0.006751999755700429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,512,0.00747626672188441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,65536,0.3513728141784668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,12288,0.06841279665629069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,10240,0.05784853299458822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,16384,0.08928106625874838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,7168,0.04129706621170044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,6144,0.03506773312886556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,8192,0.046989866097768146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,5120,0.029659734169642134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,4096,0.02443199952443441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,3584,0.022331732511520385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,3072,0.019478400548299156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,2560,0.01731200019518534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,2048,0.014493866761525472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,1536,0.012344533205032348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,1024,0.009628799557685853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,512,0.007223466535409291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,768,0.008358400066693623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,256,0.006428800026575724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,128,0.00557226687669754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,65536,0.34224640528361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,5120,0.5587882359822591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,6144,0.6863349278767903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,4096,0.4721674601236979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,8192,0.9043605168660482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,7168,0.7848906834920247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,10240,1.1142091115315755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,1536,0.21677014032999672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,12288,1.3370763142903646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,3584,0.41610240936279297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,3072,0.3656394640604655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,2048,0.2664384047190348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,2560,0.31261119842529295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,256,0.10541439851125081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,512,0.12295040289560955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,1024,0.16571839650472003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,768,0.1501845359802246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,128,0.09662933349609375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,16384,1.9205493927001953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,6144,0.17920640309651692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,5120,0.1520458698272705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,7168,0.2029205322265625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,12288,0.3445184071858724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,8192,0.2309877395629883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,10240,0.28462292353312174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,16384,0.4613674799601237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,4096,0.12450559933980306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,1024,0.04806293249130249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,1536,0.060887467861175534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,3584,0.11169280211130779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,512,0.035796264807383224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,768,0.04373226563135783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,2048,0.07276480197906494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,2560,0.08238293329874674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,256,0.030908799171447753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,3072,0.09661973317464193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,128,0.028835199276606244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,8192,0.17761386235555013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,10240,0.21868906021118165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,7168,0.15584425926208495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,12288,0.25865066846211754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,6144,0.13934186299641926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,16384,0.3562837282816569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,3072,0.07470613320668539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,2560,0.06449386676152548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,5120,0.11354026794433594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,4096,0.09539519945780436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,3584,0.08532799879709879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,512,0.028505599498748778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,1536,0.04753173192342122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,2048,0.05650666554768881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,1024,0.037444265683492024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,768,0.03399146795272827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,256,0.024165334304173787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,128,0.02241493264834086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,10240,0.18343040148417156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,12288,0.22522239685058593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,8192,0.1503925323486328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,7168,0.13359893163045247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,16384,0.3001994768778483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,6144,0.116211199760437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,3584,0.07278613249460855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,5120,0.09803520043691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,4096,0.08127360343933106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,3072,0.06397546529769897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,65536,1.84039789835612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,2560,0.0548960010210673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,65536,1.4051583607991538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,2048,0.04922133286794027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,1536,0.04086079994837443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,768,0.029146667321523028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,256,0.020669867595036827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,128,0.019399466117223103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,1024,0.03257386684417725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,512,0.024668800830841064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,8192,0.1260799964269002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,7168,0.10967679818471272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,6144,0.09592533111572266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,10240,0.15084586143493653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,12288,0.1827840010325114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,16384,0.24640960693359376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,4096,0.06750933329264322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,3584,0.06045973300933838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,5120,0.08097173372904459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,2560,0.045551999409993486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,1024,0.027029333511988322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,3072,0.05323520104090372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,768,0.024758400519688924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,2048,0.041034666697184245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,65536,1.142034149169922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,1536,0.03413120110829671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,128,0.01623679995536804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,512,0.020703999201456706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,256,0.01728746692339579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,6144,0.0961674690246582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,12288,0.18450239499409993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,10240,0.15619200070699055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,65536,0.9536906560262045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,7168,0.11243200302124023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,8192,0.12494933605194092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,16384,0.24977386792500816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,3072,0.05167359908421835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,4096,0.06631466547648111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,1536,0.029901866118113202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,5120,0.0800874630610148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,2560,0.04502293268839518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,3584,0.05853653351465861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,2048,0.03718719879786174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,256,0.01360106666882833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,128,0.012000000476837159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,512,0.01686506668726603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,768,0.020321067174275717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,1024,0.02444053292274475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,65536,1.0074527740478516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,16384,0.1893770694732666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,12288,0.1416042645772298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,6144,0.07564799785614014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,5120,0.06432746648788452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,8192,0.098963197072347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,7168,0.08575572967529296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,10240,0.12037440141042073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,1536,0.02741866707801819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,3584,0.048308265209197995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,1024,0.02177066604296366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,4096,0.05332373380661011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,3072,0.042813865343729655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,2560,0.03701119820276896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,2048,0.03271680076917012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,512,0.016577066977818807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,768,0.019833600521087645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,256,0.014009599884351095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,128,0.01320746640364329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,65536,0.7190090815226238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,12288,0.14550719261169434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,10240,0.12245972951253255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,6144,0.07426880200703939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,7168,0.08813973267873129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,8192,0.10273066361745198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,16384,0.19377387364705403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,3072,0.040541867415110275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,4096,0.05240533351898193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,3584,0.04848639965057373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,5120,0.06576213439305624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,1536,0.023987199862798056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,2560,0.035571201642354326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,2048,0.030558933814366657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,256,0.011029332876205444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,128,0.009810133775075277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,512,0.013399466872215271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,768,0.016357333461443583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,1024,0.01987839937210083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,65536,0.7563797632853191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,7168,0.06373546520868936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,12288,0.10364800294240314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,16384,0.1375509262084961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,8192,0.07175892988840739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,10240,0.08869973023732504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,6144,0.05564373334248861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,5120,0.04700800180435181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,3072,0.03168320059776306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,2560,0.02752000093460083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,4096,0.039604266484578446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,3584,0.03587520122528076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,2048,0.024434133370717367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,1536,0.020503467321395873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,1024,0.016429866353670754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,768,0.01493013302485148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,512,0.012910933295885722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,256,0.01097706655661265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,128,0.010373333096504211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,8192,0.07178239822387696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,12288,0.1072533369064331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,7168,0.06445333162943521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,10240,0.08960213661193847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,65536,0.5306026776631673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,16384,0.14106027285257977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,6144,0.05584746599197388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,5120,0.047275733947753903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,3072,0.030585600932439165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,1536,0.01867626706759135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,2560,0.027054933706919353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,3584,0.034953598181406656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,4096,0.03914453188578288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,2048,0.02276373306910197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,1024,0.015130666891733804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,768,0.012872533003489176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,512,0.011015466849009196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,128,0.008284799754619598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,256,0.009195733070373534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,65536,0.5548853556315104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,12288,0.09367679754892985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,10240,0.07897706826527914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,6144,0.04913493394851685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,16384,0.1242037296295166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,8192,0.06665813525517782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,7168,0.05736320018768311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,5120,0.04410560131072998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,4096,0.03511039813359578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,3072,0.02753173311551412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,3584,0.03210879961649577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,2560,0.024850134054819742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,2048,0.02153600056966146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,1536,0.01738026738166809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,1024,0.013848533233006796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,768,0.012353066603342693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,65536,0.47888959248860674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,512,0.010889599720637005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,256,0.009080533186594646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,128,0.008372267087300617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,16384,0.11564586957295735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,12288,0.08752106825510661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,6144,0.045211732387542725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,10240,0.07308053175608317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,7168,0.052239998181660974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,5120,0.039751466115315756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,8192,0.06102506717046102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,3072,0.0254805326461792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,4096,0.03213866750399272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,3584,0.030092799663543703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,2560,0.022817067305246987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,1536,0.01567039986451467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,1024,0.01309866706530253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,2048,0.019708800315856933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,65536,0.4377322514851888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,256,0.008346666892369587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,128,0.007551999886830647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,512,0.009981866677602131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,768,0.011334400375684102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,12288,0.0649183988571167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,7168,0.04046719868977865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,6144,0.03549760182698568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,16384,0.08724160194396972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,10240,0.05561920007069906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,8192,0.0460810661315918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,5120,0.030049065748850506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,4096,0.0254528005917867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,3584,0.023373866081237794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,768,0.010283733407656353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,3072,0.020875734090805054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,2560,0.018267732858657838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,1536,0.013240533073743186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,2048,0.015650133291880287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,1024,0.01090773344039917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,512,0.00920853316783905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,256,0.007799466451009114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,128,0.007283199826876323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,8192,0.04275306860605876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,65536,0.3224255879720052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,10240,0.05140053431193033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,16384,0.079965869585673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,12288,0.060064001878102624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,4096,0.023397332429885863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,6144,0.03216213385264079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,5120,0.028227200110753376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,7168,0.03700053294499715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,3584,0.021452800432840983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,3072,0.01890666683514913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,256,0.006896000107129414
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,2560,0.016665599743525186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,2048,0.01428053379058838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,1536,0.011944533387819926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,1024,0.009880533814430237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,768,0.00904960036277771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,512,0.008198399841785432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,65536,0.3043050765991211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,128,0.0064181332786877945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,10240,0.04297066529591878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,12288,0.050037332375844325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,8192,0.034859732786814375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,7168,0.030871466795603437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,16384,0.06577599843343099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,5120,0.02395520011583964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,6144,0.02718720038731893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,4096,0.020028799772262573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,3072,0.016214399536450704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,2048,0.012622933586438498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,1024,0.008966400225957235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,3584,0.018910932540893554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,2560,0.014891733725865683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,1536,0.01111893355846405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,65536,0.24932053883870445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,768,0.008409600456555684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,128,0.006348800162474315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,256,0.006751999755700429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,512,0.0077237332860628765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,12288,0.04922666549682617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,16384,0.06544320185979208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,10240,0.04252479871114095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,8192,0.03425920009613037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,6144,0.02647999922434489
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,7168,0.03081173300743103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,5120,0.02291413346926371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,4096,0.01941653291384379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,3072,0.015267200271288552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,3584,0.0173962672551473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,2560,0.013792000214258828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,1536,0.009969066580136616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,65536,0.2528010686238607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,2048,0.011756799618403117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,1024,0.008294400076071422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,768,0.007701333363850911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,512,0.006899199883143107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,256,0.005962666869163513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,128,0.005486933390299479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,8192,0.0316810667514801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,10240,0.03854399919509888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,12288,0.04541973272959392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,16384,0.06017280022303263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,7168,0.027830400069554645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,6144,0.024310400088628135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,5120,0.021074134111404418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,4096,0.01778986652692159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,3072,0.014398933450380961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,3584,0.016526933511098227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,1024,0.007919999957084655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,2560,0.013025066256523133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,2048,0.011096533139546711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,1536,0.009749333063761394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,65536,0.22685227394104004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,768,0.007523199915885926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,512,0.006870399912198384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,256,0.005936000247796377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,128,0.005539200206597646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,12288,0.043323731422424315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,16384,0.05757226546605428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,10240,0.03679680029551188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,8192,0.03030293385187785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,5120,0.020334933201471964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,6144,0.02336639960606893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,7168,0.02675093412399292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,4096,0.017065600554148356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,3584,0.015960533420244852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,3072,0.014025599757830302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,2560,0.012644267082214356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,2048,0.010621866583824158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,1024,0.007677866518497467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,1536,0.009380267063776652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,65536,0.21653332710266113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,768,0.007238399982452392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,128,0.005329066514968872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,512,0.006660266717274983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,256,0.0056981335083643595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,10240,0.0364959994951884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,16384,0.057810131708780924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,12288,0.04333440065383911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,7168,0.02677226662635803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,8192,0.02991466720898946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,5120,0.020410666863123574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,6144,0.0235317329565684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,3072,0.013939199844996133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,4096,0.017214934031168617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,3584,0.015920000274976094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,2560,0.012652800480524699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,2048,0.010553600390752156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,1536,0.009491200248400371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,1024,0.007714133461316426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,768,0.007272533575693766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,65536,0.21491732597351074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,512,0.006594133377075195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,128,0.005322666466236114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,256,0.005633066594600678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,5120,0.4451733271280925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,4096,0.358844788869222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,6144,0.5438528060913086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,7168,0.6085535685221355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,8192,0.7300501505533854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,10240,0.8836095809936524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,12288,1.0869845072428386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,2048,0.20349653561909994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,3584,0.33854719797770183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,3072,0.28246825536092124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,2560,0.24041706720987954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,1536,0.16681812604268392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,1024,0.1259114662806193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,256,0.07911146481831868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,128,0.07552639643351236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,16384,1.5535306294759115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,512,0.09288213253021241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,768,0.1154794692993164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,10240,0.2263989289601644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,7168,0.16120106379191082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,6144,0.13606185913085939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,8192,0.18030187288920085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,12288,0.26190826098124187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,5120,0.11360212961832683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,16384,0.3606357256571452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,2560,0.06294613281885783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,4096,0.09686079819997152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,2048,0.05592106580734253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,3584,0.08447786966959635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,3072,0.07470826307932535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,1536,0.047142398357391355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,1024,0.036968533198038736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,128,0.021886932849884033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,256,0.024024534225463866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,512,0.028008532524108887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,768,0.03325759967168172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,12288,0.23120106061299645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,6144,0.11569600105285645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,16384,0.30338347752889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,7168,0.13823359807332355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,8192,0.15883413950602215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,10240,0.1909898598988851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,65536,1.6178282419840495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,3072,0.06263253291447958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,5120,0.10200426578521729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,4096,0.07978560129801432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,1536,0.03570666710535685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,2560,0.053725866476694736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,2048,0.04505279858907064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,3584,0.07371306419372559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,512,0.019307732582092285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,256,0.015844266613324484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,768,0.023129600286483764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,128,0.013756799697875976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,1024,0.029951999584833782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,65536,1.2174208323160807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,16384,0.25852692921956383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,6144,0.10220906734466553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,7168,0.11625920136769612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,12288,0.19415253003438313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,10240,0.16739734013875324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,8192,0.14022293090820312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,5120,0.08839253584543863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,3072,0.053173331419626865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,1536,0.03158079981803894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,4096,0.06929492950439453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,2560,0.04677439928054809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,3584,0.0642250657081604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,2048,0.040251731872558594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,128,0.013076266646385193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,256,0.014813866217931113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,512,0.017961599429448447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,1024,0.02592533429463704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,768,0.02169493238131205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,65536,1.0235946655273438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,16384,0.19139092763264973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,7168,0.08365546862284343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,10240,0.11760853131612141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,6144,0.07286933263142904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,12288,0.13944320678710936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,8192,0.09813866615295411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,5120,0.061190398534139004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,4096,0.05218240022659302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,2560,0.035684267679850265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,3584,0.04626133441925049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,2048,0.031597866614659624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,3072,0.041707734266916915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,1536,0.02645333409309387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,1024,0.021458133061726888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,128,0.013118933637936911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,256,0.013573333621025085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,512,0.015990400314331056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,65536,0.759764289855957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,768,0.01930453379948934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,16384,0.18880853652954102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,12288,0.1398848056793213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,6144,0.07351786295572917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,10240,0.11844586531321208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,7168,0.08364799817403158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,8192,0.10112640062967937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,5120,0.06431573232014974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,3072,0.03938773473103841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,4096,0.05070293347040812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,3584,0.04618879954020182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,1536,0.023362133900324503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,2560,0.034986666838328045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,1024,0.01946559945742289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,2048,0.03009706735610962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,128,0.009691733121871948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,256,0.010819199681282043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,512,0.01297813355922699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,768,0.015923200050989787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,65536,0.7479082743326824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,12288,0.12182613213857015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,16384,0.16507627169291178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,6144,0.06333653529485067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,7168,0.07390613555908203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,10240,0.10277439753214519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,5120,0.055240531762441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,8192,0.08613333702087403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,4096,0.04466240008672078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,3072,0.03545173406600952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,3584,0.040915199120839435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,1536,0.02187946637471517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,1024,0.017805866400400796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,2560,0.031088000535964964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,2048,0.02695786754290263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,512,0.012446932991345723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,128,0.009845333298047383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,768,0.015129599968592325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,256,0.0108842670917511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,65536,0.6932640075683594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,12288,0.10733760197957357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,6144,0.055735464890797934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,16384,0.14244480133056642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,10240,0.09154986540476481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,7168,0.0643445332845052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,5120,0.04874666531880696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,8192,0.07606933116912842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,4096,0.03936426639556885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,3072,0.030896000067392987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,3584,0.0364522655804952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,1536,0.019285333156585694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,2560,0.027819732824961346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,1024,0.01609386702378591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,2048,0.023656533161799113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,768,0.01333440045515696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,65536,0.5506698608398437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,512,0.011158399780591329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,256,0.009639466802279156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,128,0.008727467060089112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,12288,0.08842453161875406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,16384,0.11724800268809002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,6144,0.04633599917093913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,7168,0.05327253341674805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,10240,0.07421546777089437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,8192,0.061145599683125815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,5120,0.04003200133641561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,4096,0.033089067538579306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,3072,0.026331732670466106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,3584,0.029735465844472248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,1536,0.016821332772572837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,2560,0.023310933510462442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,2048,0.020407466093699138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,1024,0.013901866475741067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,65536,0.474835205078125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,512,0.010115200281143188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,256,0.008987733721733093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,128,0.00810346653064092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,768,0.011762133240699768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,12288,0.08621013164520264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,16384,0.1137770652770996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,10240,0.07251839637756348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,6144,0.04474879900614421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,7168,0.05189973513285319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,8192,0.060711467266082765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,5120,0.03962560097376506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,4096,0.03210026621818542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,3072,0.025737599531809492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,3584,0.029585067431132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,1536,0.016516266266504924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,2560,0.022948267062505086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,2048,0.020118399461110433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,1024,0.013291733463605246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,65536,0.44523305892944337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,768,0.01162559986114502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,512,0.010028800368309021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,256,0.0085098663965861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,128,0.00774186650911967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,12288,0.07691520055135091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,10240,0.06479359865188598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,16384,0.10641280015309651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,6144,0.04012586673100789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,7168,0.04718826611836751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,8192,0.05453333457310995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,5120,0.03578026692072551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,3072,0.02336853345235189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,4096,0.02911146680514018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,1536,0.014863999684651694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,3584,0.026738133033116656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,2560,0.02111039956410726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,2048,0.018336000045140584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,1024,0.012483200430870056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,65536,0.4369205474853516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,768,0.010811733206113179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,256,0.008122666676839193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,128,0.007331199944019318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,512,0.009270399808883667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,12288,0.06401066780090332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,16384,0.0849557320276896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,10240,0.0534495989481608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,6144,0.03408000071843465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,7168,0.03929280042648316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,8192,0.04572266737620036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,5120,0.030109866460164385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,4096,0.02445333401362101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,3072,0.01990506649017334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,3584,0.0229312002658844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,2560,0.017810134092966716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,65536,0.32037652333577477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,1536,0.01267093320687612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,2048,0.015471999843915304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,1024,0.010523733496665955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,768,0.009301333626111349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,256,0.0070592001080513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,512,0.007945600152015685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,128,0.006469333171844482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,12288,0.05414719978968302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,16384,0.0714186668395996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,10240,0.04615360101064046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,6144,0.02908693353335063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,7168,0.03392533461252849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,8192,0.038362665971120195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,5120,0.025305600961049397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,4096,0.02129813234011332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,3584,0.019486933946609497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,3072,0.01744640072186788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,1536,0.01120853324731191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,2560,0.01523413360118866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,65536,0.280569585164388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,2048,0.013392000397046407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,1024,0.009546666344006857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,768,0.008529067039489746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,128,0.006066133578618368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,512,0.007468800246715546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,256,0.006648533542950948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,12288,0.04924053351084391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,16384,0.06974399884541829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,10240,0.041220267613728837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,6144,0.02621440092722575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,8192,0.03504106601079305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,7168,0.03017599980036418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,5120,0.023461333910624185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,4096,0.019604265689849854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,3072,0.015809067090352378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,3584,0.018132267395655315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,65536,0.2753727912902832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,1536,0.010475732882817586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,2560,0.01409173309803009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,2048,0.012397866447766621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,1024,0.009036800265312195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,768,0.008158933122952778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,512,0.007112533350785573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,256,0.006287999947865804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,128,0.005817600091298421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,12288,0.04301439921061198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,16384,0.0602837324142456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,10240,0.03656853437423706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,6144,0.023434666792551677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,7168,0.026774400472640993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,8192,0.03170986572901408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,5120,0.02094506621360779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,4096,0.017401599884033205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,3584,0.01560533344745636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,3072,0.01386240025361379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,2560,0.012432000041007996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,1536,0.009291733304659527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,65536,0.24311253229777016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,2048,0.010709333419799804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,1024,0.00806826651096344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,768,0.007165866593519847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,256,0.005666133264700572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,512,0.006235733131567637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,128,0.0052714665730794275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,12288,0.041282133261362715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,16384,0.05548373460769653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,10240,0.035148799419403076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,6144,0.0225055992603302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,8192,0.030059732993443805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,7168,0.025975465774536133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,5120,0.020361600319544475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,4096,0.016746666034062704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,3072,0.013379200299580892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,3584,0.015106133619944253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,65536,0.21840853691101075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,2560,0.012238933642705282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,1536,0.009048533439636231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,1024,0.008006399869918824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,2048,0.010593066612879436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,128,0.005297066768010458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,256,0.005673600236574808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,512,0.006162133316198985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,768,0.007045333087444305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,12288,0.03948053518931071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,10240,0.03337706724802653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,16384,0.05274986823399862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,8192,0.02804800073305766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,6144,0.02119893431663513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,7168,0.024753065903981526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,5120,0.01867306629816691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,4096,0.01575573285420736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,3072,0.012748799721399941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,3584,0.014385066429773965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,65536,0.20538880030314127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,1536,0.008648533622423809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,2560,0.011692800124486287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,2048,0.010045866171518963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,1024,0.007748266557852428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,768,0.006862933437029521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,256,0.005550933380921682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,512,0.006074666480223338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,128,0.005166933437188466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,16384,0.04570773442586263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,12288,0.03490026791890462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,10240,0.029269333680470782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,8192,0.024465066194534302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,7168,0.02178879976272583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,6144,0.01899306575457255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,5120,0.016597333550453185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,4096,0.014203733205795288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,3072,0.011845333377520244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,3584,0.013301333785057068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,65536,0.17249387105305988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,2048,0.00921066701412201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,1024,0.007247999807198842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,2560,0.011055999994277954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,1536,0.008438400427500407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,768,0.006551466882228851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,128,0.004840533435344696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,256,0.005253333350022634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,512,0.005715199808279673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,12288,0.034302934010823564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,10240,0.028885332743326823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,16384,0.044915199279785156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,8192,0.023941334088643393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,5120,0.016411733627319337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,7168,0.021463465690612794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,6144,0.01889280080795288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,4096,0.014030933380126953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,3072,0.011739733815193176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,3584,0.01311360001564026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,65536,0.1685098648071289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,2560,0.010838400324185688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,2048,0.00902826686700185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,1536,0.008425600330034892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,1024,0.007189333438873291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,768,0.006414933502674103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,512,0.005633066594600678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,128,0.004771199822425842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,256,0.0051701332132021586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,4096,0.2432192007700602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,5120,0.2957386652628581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,7168,0.4132543881734212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,6144,0.36259520848592125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,8192,0.4717301368713379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,10240,0.5968117396036784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,3072,0.191977596282959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,2048,0.13989653587341308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,3584,0.21941760381062828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,2560,0.15953599611918132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,1536,0.11277973651885986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,12288,0.711625607808431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,1024,0.08688746293385824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,128,0.0521450678507487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,256,0.054866135120391846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,768,0.07825067043304443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,512,0.06461973190307617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,16384,1.0028138478597006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,16384,0.2409429391225179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,6144,0.09434133370717367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,7168,0.10768427054087322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,12288,0.18241066932678224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,8192,0.12038400173187255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,10240,0.14719467163085936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,5120,0.08123626708984374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,2048,0.03880639870961507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,1536,0.03312000036239624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,1024,0.026603732506434125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,3584,0.05872213443120321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,2560,0.04440746704737346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,3072,0.05180266698201498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,4096,0.06633813381195068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,768,0.02362133264541626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,512,0.020096000035603842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,256,0.01725546717643738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,128,0.015783466895421348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,12288,0.1382570743560791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,10240,0.11185920238494873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,7168,0.08399360179901123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,6144,0.07227946917215983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,8192,0.09229546387990316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,16384,0.17896639506022136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,5120,0.060729598999023436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,2560,0.03459626833597819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,65536,1.0489749272664388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,2048,0.030742400884628297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,3584,0.04619199832280477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,4096,0.050057601928710935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,3072,0.040305066108703616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,256,0.013464533289273582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,128,0.01297813355922699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,512,0.016034133235613503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,1536,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,1024,0.021125332514444987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,768,0.019038933515548705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,65536,0.7540159861246745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,12288,0.1305418650309245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,10240,0.11148160298665363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,6144,0.06869119803110758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,16384,0.17872533798217774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,7168,0.07954773108164469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,8192,0.09295146465301514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,5120,0.06089599927266439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,3072,0.036958932876586914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,4096,0.047764265537261964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,2560,0.03333226641019185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,1536,0.02293333411216736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,3584,0.04320746660232544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,2048,0.027778132756551104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,1024,0.01936639944712321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,256,0.01125333309173584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,65536,0.7007712046305339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,512,0.013622400164604188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,128,0.010185600320498148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,768,0.016103466351826988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,5120,0.04193280140558879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,16384,0.12327040036519368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,6144,0.05068373282750448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,3584,0.032961066563924155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,7168,0.05650879939397176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,12288,0.09665813446044921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,8192,0.06593706607818603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,10240,0.0802997350692749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,4096,0.0358730673789978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,2560,0.02519039909044902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,3072,0.02893120050430298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,1536,0.01946880022684733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,1024,0.01609493295351664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,2048,0.022411733865737915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,768,0.014409599701563516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,256,0.010380799571673077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,512,0.011804800232251484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,128,0.00995306670665741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,8192,0.0676362673441569
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,10240,0.08121600151062011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,12288,0.09348159631093343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,7168,0.056858666737874354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,16384,0.12490986982981364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,65536,0.4888256072998047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,6144,0.04999786615371704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,5120,0.04380906820297241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,4096,0.03526933193206787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,3072,0.027360000212987262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,3584,0.03327893416086833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,1536,0.01735466718673706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,768,0.011636267105738323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,2560,0.02461973428726196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,2048,0.02169813315073649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,1024,0.014713600277900696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,512,0.009880533814430237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,256,0.008519466718037922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,128,0.007753600180149078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,10240,0.06871466636657715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,12288,0.08164052963256836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,8192,0.05861866474151611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,16384,0.1087392012278239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,65536,0.5005599975585937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,6144,0.04363093376159668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,5120,0.03813226620356242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,7168,0.0497322678565979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,3072,0.024753065903981526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,4096,0.03127040068308513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,3584,0.029015467564264937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,1536,0.016285866498947144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,2560,0.02244053284327189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,2048,0.01932800014813741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,1024,0.013597866892814637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,768,0.011116799712181092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,512,0.00963200032711029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,256,0.008584533135096233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,128,0.007840000092983246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,65536,0.4553845405578613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,12288,0.07324906984965006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,16384,0.10027626355489094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,6144,0.0389792005221049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,10240,0.06293653249740601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,7168,0.04468799829483032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,8192,0.05279359817504883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,5120,0.03499946594238281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,3072,0.022282665967941283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,4096,0.02850666642189026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,1536,0.014855466286341348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,2560,0.020377600193023683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,3584,0.026663466294606523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,2048,0.017638399203618368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,65536,0.40045760472615555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,1024,0.012038399775822956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,512,0.00874133308728536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,128,0.007118933399518331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,768,0.00992746651172638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,256,0.007693866888682048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,16384,0.07008960247039794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,10240,0.04563519954681396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,12288,0.05383040110270182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,5120,0.025246934096018477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,7168,0.03344426552454631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,6144,0.029765333731969195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,8192,0.037759999434153244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,2560,0.015974400440851848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,3584,0.020477867126464842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,4096,0.021644800901412964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,2048,0.014281599720319112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,1536,0.012583466370900473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,65536,0.275386651357015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,3072,0.018155733744303383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,1024,0.010256000359853109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,128,0.0071733335653940845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,256,0.007320533196131389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,512,0.008055466910203297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,768,0.00904746651649475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,12288,0.05422933499018351
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,16384,0.07178346316019693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,10240,0.04603413343429565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,6144,0.02953280011812846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,7168,0.03347200155258179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,8192,0.039607465267181396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,5120,0.026632533470789595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,4096,0.021385600169499717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,3072,0.01725226640701294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,65536,0.2747125307718913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,3584,0.02034986615180969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,2560,0.015709867080052696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,1536,0.01164906620979309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,2048,0.013929599523544311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,1024,0.009505066275596618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,768,0.008075733482837678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,128,0.006040533383687338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,256,0.0064650664726893115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,512,0.0072405333320299785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,16384,0.06269973516464233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,12288,0.047592532634735105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,10240,0.04073493480682373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,6144,0.026332799593607587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,7168,0.029798400402069092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,8192,0.03511039813359578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,5120,0.02346453269322713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,3072,0.01585706671079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,4096,0.019433599710464478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,65536,0.25124905904134115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,3584,0.018501333395640054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,2560,0.01441493332386017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,1536,0.0105621337890625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,2048,0.012771200140317282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,1024,0.008970666925112407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,128,0.0059797331690788266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,256,0.006460799773534138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,512,0.007088000078996022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,768,0.007829333345095318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,12288,0.04386133352915446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,16384,0.058183467388153075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,10240,0.03792320092519124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,6144,0.024225066105524697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,7168,0.027852799495061236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,8192,0.03229119976361593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,5120,0.021638399362564086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,4096,0.017944532632827758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,65536,0.2235402743021647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,3072,0.014619732896486918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,3584,0.017386666933695474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,2560,0.013507200280825296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,1536,0.009825066725413004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,2048,0.011707733074824016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,1024,0.008414933085441589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,512,0.006673066814740498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,128,0.005539200206597646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,256,0.005977599819501241
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,768,0.007310933371384938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,12288,0.038677334785461426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,16384,0.050197335084279385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,10240,0.032738133271535234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,8192,0.029320534070332843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,7168,0.02427840034166972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,6144,0.021502933899561563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,5120,0.018718934059143065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,4096,0.016426666577657064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,65536,0.2289578596750895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,3072,0.013558399677276612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,3584,0.016053332885106405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,2048,0.010638933380444844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,1024,0.008065066734949748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,2560,0.012569600343704223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,1536,0.009859200318654377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,768,0.007225599884986877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,512,0.006818133095900218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,256,0.0062730665008227035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,128,0.005880533158779145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,12288,0.03672106663386027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,16384,0.04844266573588053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,10240,0.031396265824635824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,6144,0.020459733406702676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,7168,0.023563732703526817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,8192,0.027400533358256023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,5120,0.018628267447153728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,4096,0.015476266543070475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,65536,0.21676054000854492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,3584,0.014871467153231302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,3072,0.01232319970925649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,1536,0.008662399649620057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,2560,0.011222400267918905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,2048,0.00983679989973704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,1024,0.007548800110816956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,768,0.006524799764156342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,256,0.005487999816735586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,512,0.00597866674264272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,128,0.0052149335543314615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,12288,0.03214186628659566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,10240,0.02738453348477681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,16384,0.04208853244781494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,8192,0.02308373252550761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,6144,0.0182751993338267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,7168,0.02063573400179545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,5120,0.015794133146603904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,65536,0.2027402718861898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,4096,0.013554132978121438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,3072,0.01102293332417806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,3584,0.013329066832860312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,2048,0.008778666456540424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,2560,0.010680533448855082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,1024,0.007098666826883952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,1536,0.008482133348782856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,512,0.005897599955399831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,768,0.006331733365853627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,256,0.005458133419354757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,128,0.00514986664056778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,12288,0.03244799971580505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,16384,0.04352426528930664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,10240,0.02821333408355713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,7168,0.02129279971122742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,6144,0.018295466899871826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,8192,0.024334933360417685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,5120,0.016754132509231568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,65536,0.17131093343098958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,3584,0.013276799519856771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,3072,0.011031466722488403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,4096,0.013686399658521017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,1536,0.008096000055472057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,2560,0.010318932930628459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,2048,0.009045333663622538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,1024,0.007231999933719635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,768,0.006345599889755249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,256,0.0053162669142087305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,512,0.005849599838256836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,128,0.005026133358478546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,12288,0.02946026722590129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,16384,0.038424531618754074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,10240,0.025036799907684325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,8192,0.021132800976435342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,6144,0.016614400347073875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,7168,0.01886826753616333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,5120,0.014461867014567056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,65536,0.15202666918436686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,3584,0.012418133020401002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,4096,0.012382933497428894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,3072,0.010452266534169514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,2048,0.008402132987976074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,1536,0.008155733346939087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,2560,0.00990613301595052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,1024,0.0069578667481740315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,768,0.006090666850407918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,512,0.0056415999929110205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,128,0.004924799998601278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,256,0.005165866514046987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,12288,0.028288000822067262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,10240,0.024395734071731567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,16384,0.03763733307520549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,8192,0.02026453415552775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,7168,0.018438400824864705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,5120,0.014313600460688271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,6144,0.016265599926312765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,3072,0.010244266192118327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,4096,0.012193066875139873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,65536,0.14359893798828124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,3584,0.012134400010108948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,2560,0.009746133287747701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,2048,0.008258133133252462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,1536,0.007912533481915791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,768,0.005883733431498209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,1024,0.00678719977537791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,256,0.004996266464392344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,512,0.0055061335364977515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,128,0.004731733103593191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,16384,0.03671679894129436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,10240,0.02415999968846639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,12288,0.028126933177312213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,8192,0.02022506594657898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,7168,0.018347734212875368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,6144,0.016179200013478598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,5120,0.014218667149543762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,1536,0.007905066510041555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,4096,0.012171733379364013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,65536,0.13773759206136066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,3584,0.011940266688664753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,2560,0.009538132945696514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,3072,0.010102400183677673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,2048,0.008122666676839193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,1024,0.006785066425800323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,768,0.005838933090368906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,512,0.0054058666030565895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,256,0.004952533543109894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,128,0.004645333190759023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,4096,0.19170559247334798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,5120,0.22974507013956705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,6144,0.28201281229654945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,7168,0.3247392018636068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,8192,0.3551189422607422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,10240,0.43678614298502605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,3072,0.14762880007425944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,3584,0.17056320508321127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,2560,0.12536959648132323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,1536,0.0860970656077067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,2048,0.10902933279673259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,12288,0.5276192029317219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,256,0.04322239955266317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,1024,0.0669823964436849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,768,0.061256531874338785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,512,0.049770665168762204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,128,0.04078720013300578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,16384,0.7013013203938802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,10240,0.11445013682047527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,8192,0.09533759752909342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,12288,0.14064639409383137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,7168,0.08582399686177572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,6144,0.07196266651153564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,5120,0.06209493478139242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,16384,0.18108800252278645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,3584,0.04523093303044637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,4096,0.05022720098495483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,2048,0.03111039996147156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,512,0.016300800442695617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,3072,0.04072639942169189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,256,0.014055466651916504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,2560,0.03460693359375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,1536,0.02606933315594991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,1024,0.02089386582374573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,768,0.019319466749827065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,128,0.012738133470217386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,7168,0.07116693655649821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,12288,0.11956799825032552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,6144,0.060697599252065026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,10240,0.09835200309753418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,8192,0.08392000198364258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,16384,0.15242986679077147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,65536,0.740997314453125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,3072,0.03333439826965332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,2560,0.02988800009091695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,5120,0.0541536013285319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,4096,0.04324906667073568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,3584,0.03908906777699788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,2048,0.02642986575762431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,128,0.009723732868830364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,1536,0.02132373253504435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,256,0.010683733224868774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,512,0.012935466567675271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,768,0.014851199587186179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,1024,0.017439999183019004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,65536,0.6264127731323242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,8192,0.07177706559499106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,7168,0.06056960026423136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,6144,0.052357331911722815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,10240,0.08637759685516358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,12288,0.10269973278045655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,16384,0.13152639865875243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,4096,0.03672106663386027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,2048,0.022992000977198283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,3072,0.02868693272272746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,2560,0.025778132677078246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,5120,0.04776000181833903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,3584,0.0339850664138794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,1536,0.01862186590830485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,768,0.013497599959373474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,1024,0.01548799971739451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,512,0.011127466956774395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,256,0.009413333733876546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,128,0.008666666348775227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,65536,0.5120031992594402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,12288,0.08349440097808838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,10240,0.0693183978398641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,6144,0.04350506862004598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,7168,0.0502239982287089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,16384,0.10670933723449708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,8192,0.05675093332926432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,3072,0.024493867158889772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,5120,0.0372927983601888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,1536,0.016039466857910155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,4096,0.03068266709645589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,3584,0.027935999631881713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,2560,0.021754666169484457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,2048,0.019691733519236247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,256,0.00879039963086446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,128,0.008113066852092742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,512,0.009967999656995137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,1024,0.013592533270517983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,65536,0.4203424135843913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,768,0.011553066968917846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,6144,0.04133546749750773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,7168,0.04784533182779948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,12288,0.07954986890157065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,16384,0.10247360070546467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,10240,0.06589333216349283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,8192,0.055240531762441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,5120,0.03643733263015747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,4096,0.0295413335164388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,3072,0.023752532402674355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,1536,0.015749333302179973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,2560,0.021571199099222817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,3584,0.027480532725652058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,2048,0.019220266739527384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,1024,0.012969600160916648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,65536,0.4062698682149251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,128,0.007720533510049183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,256,0.008425600330034892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,512,0.009726933638254802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,768,0.011315199732780456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,12288,0.06796800295511882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,10240,0.05960213343302408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,16384,0.09218560059865316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,6144,0.03688853184382121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,7168,0.041843199729919435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,8192,0.04918933312098185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,5120,0.03271786570549011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,3072,0.021254400412241616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,4096,0.026694399118423463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,3584,0.02472320000330607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,1536,0.01418773333231608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,1024,0.012045866250991822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,2560,0.01936960021654765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,2048,0.017454934120178223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,65536,0.3656671841939291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,768,0.01025279959042867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,512,0.009055999914805095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,256,0.007926400005817413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,128,0.007316266496976216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,10240,0.046862932046254475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,8192,0.03978453477223714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,7168,0.03387413422266643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,12288,0.054390398661295566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,16384,0.07328320344289144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,6144,0.029845333099365233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,5120,0.02714560031890869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,4096,0.021977599461873373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,3584,0.020820266008377074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,3072,0.017897599935531618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,2560,0.01593386630217234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,1536,0.012293333808581035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,2048,0.014738133549690247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,1024,0.010186666250228881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,768,0.008739200234413148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,512,0.007720533510049183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,256,0.006885333359241486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,128,0.006448000172773997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,65536,0.2792384147644043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,12288,0.045097601413726804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,16384,0.06072320143381754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,7168,0.028599466880162554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,8192,0.03267733256022136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,10240,0.038940799236297605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,6144,0.02518613338470459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,5120,0.022347732384999593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,1024,0.0086218665043513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,1536,0.010686933000882467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,4096,0.018557866414388023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,3584,0.01734613378842672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,3072,0.01546986699104309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,2560,0.013969066739082336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,2048,0.0129013329744339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,768,0.00788266658782959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,512,0.007178666690985362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,256,0.006458666423956554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,65536,0.23699092864990234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,128,0.006016000111897787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,12288,0.04416213432947795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,16384,0.057801600297292074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,8192,0.03221333424250285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,7168,0.027977599700291948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,10240,0.03719253142674764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,6144,0.02455893357594808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,5120,0.022137600183486938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,4096,0.01834133267402649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,3584,0.017541333039601644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,3072,0.015197867155075073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,2560,0.013745066523551942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,2048,0.012728533148765564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,1536,0.01027413308620453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,1024,0.00855466624101003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,768,0.007973333199818928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,512,0.007203199962774913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,65536,0.2212992032368978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,256,0.00631573349237442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,128,0.005884799857934316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,12288,0.039955198764801025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,16384,0.052469333012898765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,8192,0.029645866155624388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,10240,0.03418346643447876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,7168,0.02542080084482829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,6144,0.02221119999885559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,5120,0.020281600952148437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,3584,0.016004266341527303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,3072,0.014077867070833841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,4096,0.016809600591659545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,2560,0.012838400403658547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,1024,0.008111999928951263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,1536,0.009476266304651896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,2048,0.011563733220100403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,512,0.006841599941253662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,768,0.007549866537253062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,65536,0.21556266148885092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,256,0.006046933432420095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,128,0.005628799895445505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,12288,0.03503146568934123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,10240,0.02987946669260661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,16384,0.04521600008010864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,8192,0.024795732895533242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,7168,0.022284799814224245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,5120,0.018194133043289186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,6144,0.02008426586786906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,4096,0.015421866377194723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,65536,0.2010869344075521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,3072,0.01318933367729187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,3584,0.01520639955997467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,2048,0.010452266534169514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,2560,0.012161067128181458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,1024,0.007605333129564922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,1536,0.009377066294352214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,256,0.006198399762312571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,128,0.005832533538341522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,512,0.0067210664351781205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,768,0.0071722666422526045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,12288,0.03438400030136109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,10240,0.02945599953333537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,16384,0.04492799838383992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,6144,0.019350399573644005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,7168,0.021724800268809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,8192,0.025887999931971235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,5120,0.017774933576583864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,65536,0.19707093238830567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,4096,0.01477120021979014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,3072,0.012225066622098286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,3584,0.01407360037167867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,1536,0.008347733815511068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,2560,0.011241599917411804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,2048,0.009809066851933796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,1024,0.0070933332045873005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,128,0.005195733408133189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,768,0.006604800124963124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,256,0.005480533341566721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,512,0.0059562668204307554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,12288,0.03244373401006063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,16384,0.04326719840367635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,10240,0.02811093330383301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,8192,0.024621866146723428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,6144,0.018917334079742432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,7168,0.021257599194844566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,5120,0.017485866943995156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,65536,0.1623136043548584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,4096,0.014705066879590353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,3584,0.013140267133712769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,1536,0.008224000036716462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,2560,0.010428800185521444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,3072,0.011468799908955891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,2048,0.00955733358860016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,1024,0.007062399884064992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,768,0.00652266691128413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,512,0.005976533393065134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,256,0.005491200089454651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,128,0.005125333368778229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,12288,0.02991466720898946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,16384,0.039074134826660153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,10240,0.026204800605773924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,8192,0.02235306700070699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,6144,0.017595734198888144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,7168,0.019747199614842732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,5120,0.015733333428700765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,65536,0.15433707237243652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,4096,0.0128330667813619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,3584,0.012098133563995361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,3072,0.010601600011189777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,1536,0.0078015998005867004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,2560,0.009755733609199523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,2048,0.009050666292508443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,1024,0.006792533397674561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,768,0.006197333335876465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,128,0.005048533280690512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,512,0.005868799984455109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,256,0.00537066658337911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,16384,0.038636799653371176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,8192,0.022175999482472737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,12288,0.02916693290074666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,10240,0.02558079957962036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,6144,0.016476800044377647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,7168,0.018948266903559365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,5120,0.015399466951688132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,4096,0.012549333771069846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,3072,0.010446932911872864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,3584,0.012075733145078022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,65536,0.1464725335439046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,2560,0.009544533491134644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,512,0.005669333537419637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,2048,0.008881066242853801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,1536,0.007520000139872233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,1024,0.006571733454863231
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,768,0.00614933321873347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,256,0.005272533496220907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,128,0.004960000018278758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,8192,0.018573866287867228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,16384,0.03387306531270345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,10240,0.022677334149678548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,12288,0.026178133487701417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,7168,0.016798933347066246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,6144,0.015174399813016257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,5120,0.013351466258366904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,65536,0.12481173674265544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,4096,0.011326932907104492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,3584,0.01123306651910146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,3072,0.009754666686058044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,2560,0.009114666779836019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,2048,0.008170666793982189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,1536,0.007720533510049183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,1024,0.0063285330931345625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,768,0.005958400170008342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,256,0.0050911997755368555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,512,0.005481599768002828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,128,0.004827733337879181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,16384,0.03281919956207276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,10240,0.021934932470321654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,12288,0.025509333610534667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,6144,0.014923733472824097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,8192,0.01835306684176127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,7168,0.016548267006874083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,5120,0.013230933745702108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,65536,0.12208106517791747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,2048,0.008131200075149536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,4096,0.011248000462849935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,3072,0.009675733248392741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,3584,0.011099732915560405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,2560,0.00904960036277771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,768,0.005706666906674703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,1536,0.007461333274841308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,256,0.004946133494377137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,1024,0.006208000083764395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,512,0.005437866846720377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,128,0.004750933249791463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,8192,0.018294399976730345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,7168,0.01646293302377065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,16384,0.03258879979451497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,12288,0.025260800123214723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,10240,0.02181866765022278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,65536,0.1218570629755656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,6144,0.01493013302485148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,5120,0.013245866696039835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,2048,0.00800960014263789
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,4096,0.011130666732788086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,3584,0.010852266351381938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,3072,0.009635200103123982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,2560,0.008897067109743754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,1536,0.0074879998962084455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,1024,0.006203733384609222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,768,0.00580266664425532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,512,0.005328000088532766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,256,0.004922666649023692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,128,0.0046293333172798155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,4096,0.13568746248881022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,5120,0.159661865234375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,6144,0.1897472063700358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,7168,0.2250879923502604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,8192,0.24808425903320314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,10240,0.30787626902262366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,2560,0.0868117332458496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,12288,0.3754485448201498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,3072,0.10302720069885254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,3584,0.12064747015635173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,2048,0.07645866870880128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,1536,0.06080853144327799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,768,0.0438101331392924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,512,0.03598613341649373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,1024,0.04835413297017415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,128,0.028922667105992634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,256,0.03091199994087219
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,8192,0.0663701335589091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,16384,0.48085654576619463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,6144,0.050381867090860996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,7168,0.05985386768976847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,10240,0.0825162649154663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,16384,0.12618772983551024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,12288,0.09468586444854736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,5120,0.04332480033238729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,1536,0.019432532787322997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,4096,0.0370901346206665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,3584,0.032893866300582886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,2560,0.0258240004380544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,1024,0.015914666652679443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,3072,0.029923200607299805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,2048,0.023018666108449302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,768,0.014504533012708029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,512,0.012604799866676331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,256,0.010700800021489461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,128,0.009839999675750732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,10240,0.06753280162811279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,12288,0.0807861328125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,6144,0.04306346575419108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,8192,0.056884264945983885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,7168,0.048951466878255204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,16384,0.10934293270111084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,2560,0.02226240038871765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,5120,0.03747413158416748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,3072,0.02439146637916565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,2048,0.019542400042215982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,3584,0.028226133187611895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,4096,0.030345600843429566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,256,0.008528000116348267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,1024,0.013646933436393737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,1536,0.015719466408093772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,768,0.01169706682364146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,512,0.01000853379567464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,128,0.007845333218574524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,65536,0.5077461242675781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,6144,0.038919464747111006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,7168,0.04418559869130452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,65536,0.4194762547810872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,12288,0.07172906398773193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,8192,0.052475734551747644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,10240,0.06012479861577352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,16384,0.09466666380564372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,1536,0.01418773333231608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,3072,0.021884800990422566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,2560,0.02009493311246236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,2048,0.017624533176422118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,4096,0.027229867378870648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,5120,0.03436053196589152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,3584,0.02586666742960612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,128,0.007101866602897644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,768,0.010435199737548828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,512,0.00862506628036499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,256,0.007673599819342296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,1024,0.012507733702659608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,65536,0.38075733184814453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,16384,0.06717013518015544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,7168,0.03224640091260274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,12288,0.0506495992342631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,6144,0.02815679907798767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,10240,0.04284266630808513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,8192,0.03605653444925944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,2560,0.015929599603017174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,4096,0.02112320065498352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,5120,0.02550613284111023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,3584,0.01962666710217794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,3072,0.01798400084177653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,1536,0.012389333049456278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,2048,0.014111999670664468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,128,0.007012266914049785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,1024,0.010570666193962098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,768,0.00918933351834615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,512,0.00790293316046397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,256,0.007328000168005626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,65536,0.2849034627278646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,8192,0.036500267187754315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,10240,0.04203413327534993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,12288,0.04994666576385498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,7168,0.03141226569811503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,6144,0.02686506708463033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,16384,0.06546239852905274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,5120,0.025026132663091023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,2048,0.013690666357676188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,4096,0.020010666052500407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,3584,0.018990933895111084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,3072,0.01667840083440145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,2560,0.01502826710542043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,1024,0.00965226689974467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,1536,0.011205333471298217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,768,0.008006399869918824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,512,0.007086933155854543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,256,0.006409599880377452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,128,0.006043733159701029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,65536,0.2606165409088135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,12288,0.04326826731363932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,16384,0.05800106525421143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,10240,0.03662613232930501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,8192,0.03169599970181783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,7168,0.027525333563486735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,5120,0.02177600065867106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,6144,0.023797333240509033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,2048,0.012680533528327941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,4096,0.017937066157658894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,3584,0.017257599035898845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,2560,0.013961600263913474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,3072,0.01535146633783976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,1024,0.008564266562461852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,1536,0.010611200332641601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,128,0.006039466460545858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,768,0.007640533149242401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,512,0.0068693334857622785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,256,0.006353066861629486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,10240,0.0333898663520813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,65536,0.22690134048461913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,7168,0.025308799743652344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,12288,0.04012906551361084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,8192,0.02871893246968587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,6144,0.021935999393463135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,16384,0.05160640080769857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,5120,0.020068265994389854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,1536,0.009707732995351156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,3584,0.015609600146611533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,4096,0.016376533110936484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,3072,0.013940266768137612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,2048,0.011724799871444702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,2560,0.012915199995040894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,1024,0.007809066772460937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,768,0.00717439999183019
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,512,0.006450133522351582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,256,0.0058677335580190025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,128,0.005565866827964783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,65536,0.19683413505554198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,16384,0.04225920041402181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,12288,0.03287146687507629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,10240,0.028223999341328937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,8192,0.024069333076477052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,7168,0.021644800901412964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,6144,0.019139200448989868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,5120,0.017089066902796428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,4096,0.014668800433476768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,3584,0.0144896000623703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,65536,0.20874560674031578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,3072,0.012852266430854797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,2048,0.010728533069292705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,2560,0.012196266651153564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,1024,0.007426133255163829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,1536,0.009484799702962239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,768,0.007021866738796234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,512,0.0066431999206542965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,128,0.005849599838256836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,256,0.00617386649052302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,12288,0.03325653274854024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,16384,0.04351360003153483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,10240,0.02887786626815796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,6144,0.01864746610323588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,7168,0.022167466084162393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,8192,0.024836266040802003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,5120,0.01746986707051595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,4096,0.014774399995803832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,3072,0.01259519954522451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,3584,0.013801599542299906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,65536,0.17415146827697753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,2560,0.01186240017414093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,1536,0.008384000261624653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,2048,0.010445866982142131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,1024,0.00727040022611618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,768,0.006647466619809468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,256,0.005737600227197012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,512,0.006117333471775055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,128,0.005369600156943003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,12288,0.03171839912732442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,10240,0.02778773307800293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,16384,0.04145493507385254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,6144,0.017964800198872886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,7168,0.020693333943684895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,8192,0.023825067281723022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,5120,0.01655893325805664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,4096,0.013896532853444419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,3072,0.01199893355369568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,3584,0.013453867038091025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,65536,0.16482133865356446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,2560,0.011081600189208984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,1536,0.0077237332860628765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,2048,0.009528533617655436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,1024,0.006854400038719177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,128,0.0052159999807675685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,256,0.0054517333706219995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,768,0.006348800162474315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,512,0.005830400188763936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,12288,0.03054506580034892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,10240,0.026382933060328167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,16384,0.039961600303649904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,6144,0.017443199952443443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,8192,0.022851200898488362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,7168,0.02062079906463623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,5120,0.016320000092188515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,4096,0.013645866513252258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,65536,0.1584544022878011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,3072,0.011586133639017742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,3584,0.013051733374595642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,2560,0.01070186694463094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,1536,0.007718400160471599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,2048,0.009366400043169658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,1024,0.006866133213043213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,768,0.0063381334145863845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,512,0.005864533285299936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,256,0.005500799914201101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,128,0.0051242664456367494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,10240,0.02299840052922567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,12288,0.0266485333442688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,16384,0.03433813254038493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,8192,0.0193066676457723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,7168,0.017876267433166504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,6144,0.01577173372109731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,5120,0.014064000050226847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,65536,0.14584959348042806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,4096,0.012201600273450216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,3072,0.010542933146158855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,3584,0.011769599715868632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,1024,0.0065098668138186145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,2048,0.008393599589665731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,768,0.006186666587988535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,2560,0.009783466657002766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,1536,0.007632000247637431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,512,0.005739733576774597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,256,0.0053727999329566956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,128,0.0050911997755368555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,12288,0.027401600281397504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,10240,0.023805866638819374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,16384,0.03660906553268432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,8192,0.020753065745035805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,6144,0.016302933295567833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,7168,0.018593066930770875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,5120,0.014940800269444785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,65536,0.1449610710144043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,4096,0.012089600165685017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,3072,0.010269866387049357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,1536,0.007165866593519847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,3584,0.011509333054224651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,2560,0.009554133812586466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,2048,0.008616532882054646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,1024,0.006513066589832306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,768,0.006121600170930227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,512,0.005681066711743673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,128,0.004990933338801066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,256,0.0052704001466433205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,12288,0.024127999941507973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,10240,0.020849066972732543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,16384,0.030447999636332195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,8192,0.017621332406997682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,7168,0.01614293356736501
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,6144,0.014366933703422546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,65536,0.12507839997609455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,5120,0.012423466642697651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,4096,0.010526933272679647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,3072,0.009399466713269551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,3584,0.010532266894976298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,2048,0.007866666714350382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,1024,0.006295466423034668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,1536,0.007218133409818013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,2560,0.009091200431187947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,768,0.005851733187834421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,512,0.0054837331175804135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,256,0.005092266698678335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,128,0.004863999783992767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,12288,0.023695999383926393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,16384,0.02993280092875163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,10240,0.02049493392308553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,8192,0.016948266824086507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,7168,0.015931733449300132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,5120,0.012105600039164225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,6144,0.013738666971524557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,65536,0.11317226886749268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,4096,0.010427733262379963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,3072,0.009222400188446046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,3584,0.01030293305714925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,2048,0.007742933432261149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,2560,0.008958933750788371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,1024,0.006105599800745646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,1536,0.007123200098673503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,768,0.005770666897296906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,512,0.005376000205675761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,128,0.00481386681397756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,256,0.004964266717433929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,8192,0.016618667046229045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,16384,0.02962133288383484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,12288,0.02328426639238993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,10240,0.019895466168721516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,65536,0.10727573235829671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,7168,0.015442132949829102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,6144,0.01341759959856669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,4096,0.010312533378601075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,5120,0.011962667107582092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,3584,0.0102101335922877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,3072,0.009192533294359843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,2560,0.00886400043964386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,768,0.005668266614278158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,2048,0.00761706680059433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,1536,0.007020799815654755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,1024,0.006146133442719777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,512,0.005233066777388254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,256,0.004923733572165171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,12288,0.022716800371805825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,128,0.004729599754015604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,16384,0.02908586661020915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,65536,0.10614079634348553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,10240,0.0195850670337677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,3584,0.01018453339735667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,8192,0.01641279955705007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,5120,0.012049067020416259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,7168,0.015319466590881348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,6144,0.013345066706339517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,4096,0.010272000233332317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,2560,0.008772266904513042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,768,0.005589333176612854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,512,0.00522986650466919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,3072,0.009143466750780743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,2048,0.007670400043328603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,1536,0.0069472000002861025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,1024,0.00602346658706665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,256,0.004856533308823904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,128,0.004624000191688538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,12288,0.022614399592081703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,8192,0.016448000073432924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,16384,0.02882560094197591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,10240,0.019612799088160195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,7168,0.015433599551518759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,6144,0.013332266608874002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,5120,0.011945600310961407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,65536,0.10604906876881917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,4096,0.010195199648539226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,3584,0.01002346674601237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,3072,0.009057066837946574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,2560,0.008657067020734151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,2048,0.0075882668296496075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,256,0.004796800017356872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,1024,0.00600853314002355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,1536,0.006971733272075653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,128,0.004588800172011057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,768,0.005551999807357788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,512,0.005165866514046987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,4096,0.16297279993693034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,6144,0.2385536034901937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,5120,0.1994122664133708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,7168,0.27907307942708337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,8192,0.32474025090535485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,3584,0.1428277333577474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,2560,0.10476586818695069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,10240,0.38557440439860025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,3072,0.12104746500651042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,1536,0.06355306704839071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,2048,0.08512000242869058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,12288,0.45102507273356124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,128,0.027505065997441607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,1024,0.04623039960861206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,256,0.029534933964411418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,512,0.034946131706237796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,768,0.04285333156585693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,16384,0.5809375762939453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,7168,0.07586879730224609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,12288,0.12493013540903727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,6144,0.06689919630686442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,8192,0.08766933282216391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,5120,0.05627626578013102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,10240,0.10464320182800294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,16384,0.1592960039774577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,1024,0.016544000307718913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,2560,0.031377067168553666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,4096,0.046435201168060304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,3584,0.0411082665125529
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,1536,0.020935465892155967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,3072,0.035957332452138266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,2048,0.02680533329645793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,256,0.010644267002741497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,128,0.009756799538930256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,512,0.012650666634241739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,768,0.014647466937700906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,10240,0.09170347054799398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,8192,0.07860480149586996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,6144,0.0595573345820109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,12288,0.1113813320795695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,7168,0.07042666276295981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,16384,0.14276480674743652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,5120,0.047474133968353274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,2560,0.02843093276023865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,3072,0.03288319905598958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,3584,0.03754986524581909
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,4096,0.04285973310470581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,2048,0.023944532871246337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,1536,0.018895999590555827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,768,0.012682666381200155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,1024,0.0141184002161026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,256,0.008665600419044494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,512,0.010072533289591472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,128,0.007804800073305767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,65536,0.6050944010416667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,8192,0.06985493501027426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,7168,0.06469546556472779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,10240,0.08668586413065592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,12288,0.10466667016347249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,16384,0.1309823989868164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,6144,0.055637331803639736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,5120,0.04559786717096965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,2048,0.022285866737365722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,65536,0.5283498764038086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,3072,0.030280532439549764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,4096,0.03962666591008504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,3584,0.035426131884257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,2560,0.027348266045252485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,512,0.009145599603652955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,1536,0.017629865805308023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,1024,0.013421866297721862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,768,0.011541333794593812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,128,0.007101866602897644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,256,0.007672533392906189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,7168,0.041621331373850504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,6144,0.03667519887288411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,16384,0.08943466345469156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,12288,0.06742506821950277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,8192,0.047175467014312744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,10240,0.05679359833399454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,65536,0.4912490526835124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,2048,0.016237866878509522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,5120,0.030696533123652142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,3584,0.024149332443873087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,3072,0.021333332856496176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,4096,0.026770132780075073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,2560,0.018588799238204955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,1536,0.013636266191800436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,128,0.006866133213043213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,768,0.010034132997194927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,1024,0.011230933666229247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,512,0.008358400066693623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,256,0.007423999905586243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,6144,0.034332799911499026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,16384,0.0868554671605428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,10240,0.053123199939727785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,65536,0.3217717488606771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,12288,0.06406293312708536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,7168,0.039768532911936445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,8192,0.04528106848398845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,5120,0.028700800736745198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,3584,0.022851200898488362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,2560,0.017514665921529136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,4096,0.025303467114766436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,1024,0.009989333152770997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,3072,0.02031573255856832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,256,0.0064298664530118305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,512,0.007467733323574066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,768,0.008805333574612936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,2048,0.015018666783968607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,1536,0.012475732962290447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,128,0.005931733548641205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,7168,0.03661333322525025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,10240,0.05082240104675293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,6144,0.032014934221903484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,8192,0.04282346566518148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,65536,0.30369599660237634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,12288,0.06005973418553671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,3584,0.021862399578094483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,16384,0.07675413290659586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,4096,0.024029866854349772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,2048,0.014509866635004679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,5120,0.02807360092798869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,1024,0.00957546631495158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,768,0.008228266735871632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,2560,0.016902399063110352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,3072,0.01909760038057963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,1536,0.012140799562136333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,512,0.0073173334201176955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,256,0.006468266745408376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,128,0.0059445331494013464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,12288,0.05700800021489462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,16384,0.07728959719340006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,65536,0.28514238993326824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,10240,0.0467306653658549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,6144,0.030637866258621214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,8192,0.04096320072809855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,7168,0.03455893198649089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,5120,0.02550293405850728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,3584,0.020963199933369956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,4096,0.022486400604248048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,3072,0.01798186699549357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,2560,0.01586560010910034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,2048,0.013337600231170654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,1536,0.01123413344224294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,768,0.007787733276685078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,1024,0.008840533097585042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,512,0.006786133348941803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,256,0.0060245335102081295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,65536,0.2720650672912598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,128,0.005559466779232025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,12288,0.05148693323135376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,7168,0.03269760012626648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,8192,0.036297599474589035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,10240,0.04323413372039795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,6144,0.02762346665064494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,16384,0.072597336769104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,5120,0.024127999941507973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,4096,0.02142933408419291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,3072,0.016698666413625083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,3584,0.019528534015019736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,2048,0.012788266936937968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,2560,0.014849066734313965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,1024,0.008578133583068848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,1536,0.010689066847165425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,768,0.0078015998005867004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,256,0.0062943999965985615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,512,0.0070592001080513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,65536,0.2434058666229248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,128,0.005916800101598104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,12288,0.051666132609049474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,16384,0.06587093273798625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,8192,0.036355201403299967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,7168,0.031514666477839154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,10240,0.044093867142995194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,3584,0.01861226757367452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,6144,0.027687466144561766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,4096,0.020618667205174766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,5120,0.023835732539494833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,2048,0.012401066223780314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,2560,0.014631467064221701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,768,0.007230933507283528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,3072,0.01644373337427775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,1536,0.009907199939092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,1024,0.008182399968306223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,256,0.0058335999647776285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,512,0.006493866443634033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,128,0.005366399884223938
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,65536,0.2415679931640625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,16384,0.06488960186640422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,10240,0.041788800557454424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,8192,0.035411198933919266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,12288,0.04992213249206543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,7168,0.031335467100143434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,6144,0.026281599203745527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,5120,0.02299306591351827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,4096,0.019801600774129232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,3072,0.016114133596420287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,3584,0.018167465925216675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,2560,0.013967999815940857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,2048,0.01148373285929362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,1536,0.0095360000928243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,65536,0.23741226196289061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,1024,0.007701333363850911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,768,0.00693333347638448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,512,0.006286933521429698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,128,0.005163733164469401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,256,0.005613866448402405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,10240,0.04101973374684652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,12288,0.0485152006149292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,16384,0.06587946812311808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,8192,0.03415573438008626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,7168,0.030364799499511718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,6144,0.025887999931971235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,5120,0.02174933354059855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,4096,0.019211733341217042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,3584,0.01735466718673706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,3072,0.01520853340625763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,65536,0.22857173283894858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,2560,0.013718400398890176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,2048,0.011485866705576579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,1536,0.009361066420873006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,1024,0.007793066898981731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,768,0.006871466835339864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,512,0.0061930666367212926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,256,0.005565866827964783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,128,0.0051807999610900875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,16384,0.05454399983088175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,12288,0.044912000497182206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,10240,0.037674665451049805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,6144,0.024482132991154988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,8192,0.03197973370552063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,5120,0.02081386645634969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,7168,0.028318933645884198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,4096,0.01874879995981852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,2048,0.010662399729092916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,65536,0.2080320040384928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,3584,0.01686826745669047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,3072,0.014703999956448874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,768,0.006870399912198384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,2560,0.01263146698474884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,256,0.00561706672112147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,1536,0.009274666508038838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,1024,0.007436800003051758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,512,0.0061706667145093284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,128,0.00517439991235733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,12288,0.043586134910583496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,16384,0.05841066837310791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,8192,0.030263467629750566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,10240,0.03663253386815389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,7168,0.027577600876490277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,65536,0.20363200505574547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,6144,0.02387626568476359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,5120,0.020175999402999877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,3584,0.015999999642372132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,4096,0.01799466609954834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,3072,0.014459733168284097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,2560,0.012321066856384278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,2048,0.010419199864069622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,512,0.005990399916966756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,1536,0.008846933643023174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,1024,0.0072970668474833175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,128,0.005051733553409576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,768,0.00658240020275116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,16384,0.052496000130971276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,256,0.005307733515898387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,12288,0.04040746688842774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,10240,0.03299840092658997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,7168,0.026024534304936724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,8192,0.02908266584078471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,6144,0.021896533171335855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,5120,0.019038933515548705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,4096,0.016673066218694053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,65536,0.19497706095377604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,3584,0.01527679959932963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,512,0.0058442667126655575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,3072,0.013480533162752786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,2560,0.011682132879892986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,1536,0.008589866757392883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,2048,0.010054399569829304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,768,0.006444799900054932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,1024,0.007181866466999054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,256,0.005309866865475973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,128,0.0050240000089009605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,16384,0.04283413489659627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,12288,0.03345919847488403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,10240,0.028281599283218384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,8192,0.023858133951822916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,7168,0.020692267020543418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,4096,0.013990400234858194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,3584,0.012794666488965354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,6144,0.018230400482813516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,5120,0.015337600310643514
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,65536,0.1550528049468994
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,768,0.006046933432420095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,256,0.00510506679614385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,3072,0.011223466197649638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,2560,0.009989333152770997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,1536,0.007875200112660725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,2048,0.00865600009759267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,1024,0.006492800017197926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,512,0.005603200197219849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,16384,0.03155306577682495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,128,0.004878933231035868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,8192,0.017386666933695474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,12288,0.024589866399765015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,7168,0.01532799998919169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,6144,0.01421119968096415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,4096,0.010956799983978272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,10240,0.02114560008049011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,2048,0.007366399963696797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,65536,0.11929173469543457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,5120,0.011929600437482198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,3072,0.009212799866994222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,3584,0.010337066650390626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,2560,0.008546132842699687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,1536,0.0070165331164995836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,512,0.005309866865475973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,256,0.004958933095137278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,1024,0.006137600044409434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,768,0.005644799768924713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,16384,0.026958932479222614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,7168,0.01432213286558787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,10240,0.018523732821146645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,128,0.004795733094215393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,12288,0.021542400121688843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,3072,0.008771199981371562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,6144,0.01285546620686849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,8192,0.01546346644560496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,65536,0.09810026486714682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,5120,0.011320533355077107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,2560,0.008211199939250947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,4096,0.01016960044701894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,3584,0.010034132997194927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,2048,0.007166933516661327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,1024,0.005932799975077311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,128,0.0046741331617037455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,1536,0.00689386675755183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,768,0.00547733356555303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,512,0.0052138666311899815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,256,0.004862933357556661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,10240,0.018613332509994508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,8192,0.015527466932932535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,65536,0.09770239988962809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,16384,0.02690453330675761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,12288,0.021350399653116862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,6144,0.012807466586430869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,4096,0.01011306643486023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,7168,0.014292266964912415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,1024,0.0060149331887563075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,5120,0.011400533715883891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,3584,0.009920000036557516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,512,0.005147733290990194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,3072,0.00867733359336853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,1536,0.006870399912198384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,2560,0.008100266754627227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,256,0.00486826648314794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,2048,0.007030400137106578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,768,0.005435733497142792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,128,0.004626133541266123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,6144,0.27485974629720056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,4096,0.18254186312357584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,5120,0.23423253695170082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,7168,0.33472960789998374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,8192,0.37438828150431314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,10240,0.4327626546223958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,2048,0.09647040367126465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,1536,0.07232106526692708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,3584,0.16148160298665365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,3072,0.14211840629577638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,12288,0.5003861427307129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,256,0.029268266757329305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,768,0.04280106623967488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,2560,0.11804373264312744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,1024,0.050115199883778896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,128,0.027051732937494917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,512,0.03427306811014812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,16384,0.6187509536743164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,6144,0.07452373504638672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,7168,0.08712853590647379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,10240,0.11609386603037516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,12288,0.13825279871622723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,8192,0.09282240072886148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,16384,0.1788383960723877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,5120,0.059453864892323814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,2560,0.035599998633066815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,3584,0.04677120049794515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,2048,0.02839360038439433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,3072,0.04086506764094035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,4096,0.052407467365264894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,1536,0.02259733279546102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,512,0.012932266791661581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,256,0.010732799768447876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,768,0.014863999684651694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,1024,0.01713599960009257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,128,0.009842133522033692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,10240,0.10272640387217205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,7168,0.0792842706044515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,16384,0.15983573595682782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,8192,0.09028159777323405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,12288,0.11857813199361164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,6144,0.06681599617004394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,4096,0.046589867273966475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,5120,0.05759146610895792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,3072,0.0368778665860494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,2048,0.026254934072494508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,3584,0.04225493272145589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,768,0.01360426644484202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,2560,0.03178026676177979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,1024,0.01560640037059784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,1536,0.020732800165812172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,512,0.010339200496673584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,128,0.007935999830563863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,256,0.008687999844551087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,65536,0.6430933634440105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,10240,0.09915520350138346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,12288,0.11154346466064453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,16384,0.1518719991048177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,8192,0.08660906950632731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,7168,0.07499093214670817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,6144,0.0632426659266154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,2560,0.030133332808812457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,65536,0.5710314432779948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,5120,0.05125759840011597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,3072,0.03494826555252075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,4096,0.044410665829976395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,3584,0.03969493309656779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,2048,0.02468693256378174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,1536,0.018977065881093345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,768,0.012035199999809265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,1024,0.014509866635004679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,512,0.009898666540781658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,256,0.007701333363850911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,128,0.007132799923419952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,7168,0.04585173527399699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,8192,0.05256426731745402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,16384,0.09693333307902018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,65536,0.5568885167439779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,10240,0.062388265132904054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,6144,0.039640533924102786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,12288,0.07420159975687662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,4096,0.029652265707651775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,5120,0.0337994654973348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,3584,0.02626986702283223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,3072,0.023177599906921385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,2560,0.02027946710586548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,1024,0.011659733454386393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,2048,0.01726186672846476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,768,0.009820800026257832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,512,0.008538666367530822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,256,0.007447466750939687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,128,0.006910933554172516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,1536,0.014486400286356607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,6144,0.037393065293629964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,16384,0.09226986567179361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,12288,0.06623146533966065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,8192,0.04885653257369995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,7168,0.04287999868392944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,10240,0.0583189328511556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,4096,0.027419734001159667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,65536,0.34270401000976564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,3072,0.021731199820836385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,3584,0.02474986712137858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,2560,0.018739199638366698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,2048,0.016037333011627197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,768,0.00864746669928233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,1536,0.01297706663608551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,5120,0.0313482662041982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,1024,0.00997226635615031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,512,0.0076000000039736434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,256,0.006515199939409892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,128,0.00591786652803421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,12288,0.0652127981185913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,16384,0.08834559917449951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,65536,0.33653761545817057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,5120,0.02969493269920349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,4096,0.02581760088602702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,8192,0.04642026821772258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,10240,0.05653333266576131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,7168,0.040593067804972335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,6144,0.034227200349171955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,3584,0.02453119953473409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,2048,0.015513599912325541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,3072,0.020717867215474448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,1024,0.010101333260536194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,2560,0.018150399128595986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,1536,0.012665599584579468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,768,0.008422399560610454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,512,0.007548800110816956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,256,0.006569600105285645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,128,0.005966933568318685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,65536,0.3312469482421875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,5120,0.029062400261561077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,6144,0.03425600131352742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,7168,0.039678935209910074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,16384,0.08390613396962485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,8192,0.045311999320983884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,12288,0.062163201967875156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,10240,0.05276373227437338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,4096,0.024754132827123007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,3072,0.019738666216532388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,2560,0.017223467429478966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,1024,0.009307733178138733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,3584,0.02267199953397115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,2048,0.014910933375358582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,1536,0.011868799726168316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,128,0.005566933254400889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,768,0.008042666812737782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,512,0.006969599922498067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,256,0.006073600053787232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,65536,0.29692907333374025
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,10240,0.04666453202565511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,7168,0.034783999125162765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,8192,0.03771946827570598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,16384,0.07057600021362305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,12288,0.055693864822387695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,6144,0.029993599653244017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,5120,0.02600533366203308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,2048,0.01362773378690084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,4096,0.02223893404006958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,3072,0.018053332964579262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,3584,0.020921599864959717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,2560,0.016160000363985697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,1536,0.0113237331310908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,1024,0.008835200468699138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,256,0.006324266890684764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,128,0.005961599946022034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,65536,0.26681814193725584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,512,0.007233066856861115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,768,0.007952000200748443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,8192,0.04097599983215332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,16384,0.07741226355234782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,10240,0.04639573494593303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,12288,0.058093865712483726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,3584,0.020768000682195028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,7168,0.036372268199920656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,6144,0.030191999673843384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,5120,0.027406932910283406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,3072,0.018388267358144125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,4096,0.023100799322128295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,2560,0.016210132837295534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,128,0.0054282665252685545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,2048,0.013245866696039835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,1536,0.010851200421651204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,1024,0.00860800047715505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,768,0.007462400197982788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,512,0.006503466765085857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,65536,0.2667370796203613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,256,0.005852800110975901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,7168,0.03539946476618449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,10240,0.04833706617355347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,8192,0.03975253502527873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,6144,0.03099199930826823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,5120,0.025831466913223265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,12288,0.05698239803314209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,16384,0.07526933352152507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,4096,0.02239039937655131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,1536,0.010492799679438274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,2048,0.013106133540471396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,768,0.007136000196139018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,65536,0.25267093976338706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,2560,0.015532799561818442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,3072,0.017917867501576742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,3584,0.020381865898768108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,1024,0.008298666775226593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,256,0.005672533313433329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,512,0.006388266881306966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,128,0.005188266436258951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,10240,0.04738346735636394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,12288,0.055742931365966794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,4096,0.022318933407465616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,8192,0.0394976019859314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,16384,0.06626026630401612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,7168,0.03505920171737671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,6144,0.0302239994208018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,5120,0.025333333015441894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,3584,0.020119466384251914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,2048,0.012647466858228049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,2560,0.01532373329003652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,3072,0.017621332406997682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,65536,0.24460906982421876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,1024,0.008208000163237254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,768,0.007112533350785573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,512,0.006279466549555461
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,1536,0.01035520037015279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,256,0.0056320001681645715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,128,0.0052704001466433205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,12288,0.05073173443476359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,10240,0.04179519812266032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,8192,0.03457066615422567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,16384,0.06636159817377726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,6144,0.02759360074996948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,7168,0.03158079981803894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,5120,0.023707733551661173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,3072,0.0160970667997996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,1536,0.01007466713587443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,2560,0.01392213304837545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,65536,0.22467306454976402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,4096,0.02034239967664083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,3584,0.01842986742655436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,512,0.006293333570162455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,2048,0.011749333143234253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,1024,0.007742933432261149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,768,0.007062399884064992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,256,0.005578666428724925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,128,0.0052138666311899815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,16384,0.06813226540883383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,12288,0.051957333087921144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,10240,0.044753066698710126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,8192,0.03749866485595703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,6144,0.029433600107828778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,7168,0.03189226587613424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,5120,0.024948267141977946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,65536,0.2350719928741455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,4096,0.02049386699994405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,3584,0.018579200903574625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,3072,0.01656426688035329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,1536,0.009595732887585957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,1024,0.007845333218574524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,2560,0.014375467101732889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,2048,0.011866666873296102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,768,0.006740266581376393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,512,0.006016000111897787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,256,0.005436799923578898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,128,0.005077333251635233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,10240,0.04012906551361084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,16384,0.061129601796468105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,6144,0.025935999552408856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,12288,0.047761066754659014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,4096,0.018863999843597413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,7168,0.029475200176239013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,8192,0.034082134564717606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,5120,0.02193173368771871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,65536,0.2011712074279785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,1024,0.007503999769687653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,3072,0.01477013329664866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,3584,0.01728106737136841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,2560,0.013240533073743186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,2048,0.011172266801198323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,1536,0.009450667103131612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,256,0.005348266661167144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,768,0.006771199901898702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,512,0.005921066800753275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,16384,0.04838933149973552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,128,0.004960000018278758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,4096,0.016134400169054666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,12288,0.03721919854482015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,10240,0.032686932881673174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,8192,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,7168,0.024486400683720908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,6144,0.021149865786234536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,5120,0.018729599316914876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,65536,0.18185173670450847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,2048,0.00962773362795512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,3072,0.01232319970925649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,3584,0.014612266421318054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,2560,0.011502933502197266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,1536,0.008270933230717977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,1024,0.00674773355325063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,512,0.0056650668382644655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,768,0.0061749334136645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,16384,0.03620586792627971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,256,0.005177600185076395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,128,0.004835199813048045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,8192,0.021220266819000244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,6144,0.015003732840220132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,7168,0.017193599541982015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,5120,0.013402666648228964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,12288,0.029002666473388672
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,10240,0.02418880065282186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,4096,0.011896533767382304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,3584,0.010782933235168457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,65536,0.13460373878479004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,2560,0.008941866954167684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,1536,0.007073066631952922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,3072,0.00958079993724823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,768,0.005678933362166087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,128,0.0048096001148223875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,256,0.004971733192602793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,2048,0.007919999957084655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,1024,0.006132266422112783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,512,0.005274666845798493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,16384,0.02613760034243266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,8192,0.015166933337847391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,5120,0.011333333452542622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,4096,0.010082133611043294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,12288,0.02073919971783956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,10240,0.01791680057843526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,65536,0.09854613145192465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,7168,0.013740799824396768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,6144,0.012354133526484172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,3584,0.00972266693909963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,2048,0.007063466807206471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,1024,0.0058559998869895935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,1536,0.006780800223350525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,768,0.005465599894523621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,512,0.005163733164469401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,3072,0.008404266834259034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,2560,0.00829013337691625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,256,0.004826666911443075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,10240,0.017866667111714682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,7168,0.01381226678689321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,65536,0.09452160199483237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,128,0.004633600016434988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,16384,0.026044799884160356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,12288,0.020716800292332967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,4096,0.010010666648546855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,8192,0.01513706644376119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,5120,0.01125973363717397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,6144,0.012396799524625142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,3072,0.008306133250395458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,2560,0.008053333560625712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,512,0.005067733426888784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,768,0.0053941334287325535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,3584,0.009738666812578838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,1536,0.006811733543872833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,2048,0.007014399766921997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,1024,0.005922133227189382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,128,0.004670933385690053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,256,0.004770133395989736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,4096,0.08265706698099771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,5120,0.09981653690338135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,6144,0.11769493420918782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,7168,0.13201920191446942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,8192,0.15002986590067546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,10240,0.1828213373819987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,3584,0.07368000348409018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,2560,0.05645653406778971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,3072,0.0646997332572937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,2048,0.049149866898854574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,1024,0.03161279956499736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,12288,0.21691625912984214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,256,0.018242132663726807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,128,0.016687999169031777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,768,0.02884693344434102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,1536,0.040487468242645264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,512,0.021600000063578286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,16384,0.2816469192504883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,7168,0.03857920169830322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,8192,0.04301226536432902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,10240,0.05128959814707438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,5120,0.031889067093531294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,16384,0.08066240151723227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,6144,0.034467200438181564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,12288,0.06022613445917765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,4096,0.02569813330968221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,3072,0.021149865786234536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,1536,0.0136543999115626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,768,0.010631466905275982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,3584,0.023534933725992836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,512,0.008873599767684936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,2560,0.018812799453735353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,2048,0.016713599363962807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,1024,0.011454932888348897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,128,0.006986666719118755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,256,0.007381333410739899
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,16384,0.06275519927342733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,8192,0.03441919883092244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,7168,0.030997333923975627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,6144,0.027693865696589152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,10240,0.04139413436253865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,12288,0.04752746820449829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,3584,0.019059199094772338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,2048,0.013168000181516013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,2560,0.015058133006095886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,3072,0.016706132888793947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,4096,0.020627200603485107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,5120,0.024362667401631673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,1024,0.009693866968154908
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,1536,0.011490133404731751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,512,0.007046400010585785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,768,0.008694400389989216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,256,0.006363733112812043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,128,0.005982933441797892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,65536,0.29302186965942384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,12288,0.042004267374674484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,16384,0.05547626813252767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,7168,0.02718293269475301
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,10240,0.0363317330678304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,8192,0.030408533414204915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,65536,0.24171306292215983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,6144,0.025764266649882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,5120,0.02159573237101237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,3072,0.014564266800880432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,4096,0.018116267522176106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,2560,0.013199999928474426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,3584,0.016571733355522155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,1536,0.010321066776911417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,2048,0.012497066458066305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,1024,0.008943999807039898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,512,0.006393600006898243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,768,0.0077354664603869125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,256,0.005914666752020518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,128,0.005524266759554545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,65536,0.21695359547932944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,16384,0.04718826611836751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,12288,0.03525333404541016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,8192,0.025846399863560993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,7168,0.02334400018056234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,10240,0.030857600768407184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,6144,0.021285333236058555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,5120,0.019592533508936562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,4096,0.01603626708189646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,3072,0.013235200444857279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,2048,0.011010133226712545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,3584,0.01490239997704824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,2560,0.01222933332125346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,1536,0.010172800223032633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,1024,0.008039466540018718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,65536,0.18670186996459961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,256,0.006154666841030121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,128,0.005832533538341522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,768,0.007012266914049785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,512,0.006541866560777028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,12288,0.03399680058161418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,16384,0.04405653476715088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,10240,0.030459733804066975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,6144,0.02016106645266215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,7168,0.022513065735499063
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,8192,0.02504533330599467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,5120,0.017832533518473307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,4096,0.01530026694138845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,3584,0.013718400398890176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,3072,0.012401066223780314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,2560,0.011597866813341778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,1536,0.009070932865142822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,1024,0.007393066585063934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,2048,0.010385066270828247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,65536,0.16932160059611004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,768,0.006559999783833821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,256,0.005752533177534739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,512,0.006090666850407918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,128,0.0054506664474805195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,12288,0.0319541335105896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,16384,0.04103680054346721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,10240,0.027526400486628216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,7168,0.02118933399518331
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,6144,0.018569600582122803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,8192,0.0235317329565684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,5120,0.01669973333676656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,4096,0.014174933234850565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,3072,0.011497599879900615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,3584,0.012941867113113403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,1536,0.008572799960772197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,2560,0.010818133751551311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,2048,0.009729066491127014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,1024,0.0068234667181968685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,65536,0.1607946713765462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,128,0.005137066543102265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,768,0.0061930666367212926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,256,0.005362133185068766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,512,0.005830400188763936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,12288,0.02951040069262187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,10240,0.026065067450205488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,16384,0.03854933182398478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,6144,0.01729173262914022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,8192,0.022184532880783082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,5120,0.01578133304913839
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,7168,0.020004266500473024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,4096,0.013597866892814637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,3072,0.011130666732788086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,3584,0.012618666887283326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,65536,0.14669973055521648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,2560,0.010501333077748616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,1536,0.0077344000339508055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,2048,0.009523199995358785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,1024,0.006681600213050842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,768,0.006243200103441874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,256,0.005512533088525137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,512,0.005798399945100148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,128,0.005261866748332978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,12288,0.024868265787760416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,10240,0.021826134125391642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,16384,0.032339199384053545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,7168,0.016760534048080443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,8192,0.018403200308481853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,6144,0.015194666385650635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,5120,0.013610666990280152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,4096,0.0121781329313914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,65536,0.12463146845499676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,3072,0.010156800349553425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,3584,0.011621333161989848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,1024,0.006218666831652323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,2048,0.008227199812730153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,2560,0.009854933619499207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,1536,0.007533866663773854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,768,0.0060127998391787205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,256,0.005335466563701629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,512,0.0056991999348004665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,128,0.005058133105436961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,12288,0.02432639996210734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,10240,0.021016534169514975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,16384,0.03094080090522766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,8192,0.017692800362904867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,6144,0.014816000064214071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,7168,0.016176000237464905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,5120,0.013501866658528646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,65536,0.11611093680063884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,4096,0.011975466211636861
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,3072,0.010007466872533162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,3584,0.011500799655914306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,2560,0.009609599908192951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,2048,0.008160000046094257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,1024,0.006189866860707601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,1536,0.007256533205509186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,768,0.00592853327592214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,256,0.005313066641489664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,512,0.005580799778302511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,128,0.005046399931112925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,10240,0.022080000241597494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,12288,0.02577280004819234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,16384,0.03357013463973999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,6144,0.015546666582425437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,8192,0.01904319922129313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,7168,0.016976000865300496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,5120,0.014261333147684732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,65536,0.12419626712799073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,4096,0.012359467148780823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,3072,0.009860266248385112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,2560,0.00934933324654897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,3584,0.011495467027028401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,1536,0.006939733525117238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,2048,0.007924266656239827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,1024,0.006341333190600078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,768,0.005932799975077311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,512,0.005545599758625031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,128,0.005010133484999338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,256,0.005258666475613912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,12288,0.024498132864634196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,10240,0.02148159941037496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,16384,0.032517333825429276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,8192,0.018617600202560425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,6144,0.01520639955997467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,7168,0.016839466492335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,5120,0.013949867089589438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,65536,0.1193013350168864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,4096,0.012156800429026286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,3072,0.00942186713218689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,3584,0.011124266187349956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,2560,0.008708266417185466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,1536,0.006916266679763794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,2048,0.007952000200748443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,512,0.005578666428724925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,1024,0.006331733365853627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,768,0.005826133489608765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,256,0.005153066913286845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,128,0.0049333333969116214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,10240,0.018780799706776936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,12288,0.02147946755091349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,16384,0.027369600534439088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,8192,0.015824000040690102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,7168,0.014669866363207499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,6144,0.013552000125249227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,5120,0.012226133545239767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,65536,0.10579626560211182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,4096,0.010476799805959065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,3072,0.008605866630872091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,3584,0.010382933417956035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,2048,0.0072053333123524976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,1024,0.006054399907588959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,1536,0.00690773328145345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,2560,0.008272000153859456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,768,0.005689600110054016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,512,0.005376000205675761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,256,0.005077333251635233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,128,0.004935466746489207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,8192,0.015650133291880287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,12288,0.020994132757186888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,10240,0.018484266599019368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,16384,0.026510934034983318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,7168,0.014526933431625366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,65536,0.09448959827423095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,6144,0.013299199938774108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,5120,0.01164479951063792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,4096,0.009980799754460652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,3072,0.008338133494059246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,3584,0.009527466694513957
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,2560,0.008154666423797608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,2048,0.007073066631952922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,1536,0.006856533388296763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,768,0.005606399973233541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,1024,0.0059125334024429325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,512,0.005277866621812185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,128,0.004769066472848257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,256,0.0049792001644770306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,7168,0.013778133193651834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,16384,0.026073600848515826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,65536,0.09621866544087729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,10240,0.018261333306630455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,8192,0.015187199910481772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,12288,0.020777599016825358
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,6144,0.012517333030700684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,5120,0.01114453375339508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,3584,0.009513599673906963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,3072,0.008193066716194153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,4096,0.009909333785374959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,128,0.004699733356634776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,256,0.004859733581542969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,2048,0.006975999971230824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,2560,0.00807360013326009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,1536,0.006780800223350525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,1024,0.005846400062243144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,512,0.005172266562779745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,768,0.005467733244101206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,10240,0.01808746655782064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,12288,0.020552533864974975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,8192,0.014728533228238425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,6144,0.012427733341852824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,16384,0.025972266991933186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,3072,0.008198399841785432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,3584,0.009393067161242167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,7168,0.013517866532007853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,5120,0.011106133460998535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,65536,0.09220053354899088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,2048,0.007002666592597961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,2560,0.00804373323917389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,4096,0.009821866949399311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,1024,0.005835733314355215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,1536,0.006717866659164429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,16384,0.02586666742960612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,768,0.005446400245030721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,512,0.005141333242257436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,256,0.004833066463470459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,128,0.004692266881465912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,65536,0.09089279969533284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,12288,0.02014933427174886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,10240,0.017364267508188883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,4096,0.009764267007509868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,8192,0.01460693379243215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,7168,0.013485866785049438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,3072,0.008195200065771738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,2560,0.007926400005817413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,5120,0.011113599936167399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,6144,0.012335999806722005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,3584,0.00941439966360728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,2048,0.006981333096822103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,1536,0.0067893331249554946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,512,0.005076266825199127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,256,0.004826666911443075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,1024,0.005781333148479462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,8192,0.014510933558146158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,12288,0.019817600647608437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,768,0.005438933273156484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,65536,0.09012693564097086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,128,0.004605866471926371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,7168,0.01344106694062551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,16384,0.02499413291613261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,5120,0.011126400033632914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,4096,0.009769599636395771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,3584,0.009356799721717834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,10240,0.017234132687250773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,3072,0.008178133269151051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,6144,0.012282666563987733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,1536,0.006693333387374878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,1024,0.0057536001006762184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,512,0.005095466474692027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,128,0.004582400123278299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,2560,0.007918933530648549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,2048,0.006949333349863689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,768,0.005407999952634176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,256,0.004782933493455251
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,16384,0.025013333559036253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,12288,0.01979093352953593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,10240,0.017298134167989095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,7168,0.013430399696032205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,5120,0.01107413371404012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,4096,0.009701333443323771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,65536,0.08957546552022298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,3584,0.009223467111587525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,8192,0.014472533265749613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,6144,0.012337066729863485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,3072,0.008116266628106435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,1536,0.006713599960009258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,2048,0.006857599814732869
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,256,0.004862933357556661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,512,0.005019733309745788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,768,0.005377066632111868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,2560,0.007818666597207386
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,1024,0.00584853341182073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,128,0.004589866598447164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,5120,0.10055039723714192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,4096,0.08139413197835287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,7168,0.13445332845052083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,6144,0.11421866416931152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,8192,0.15434773763020831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,10240,0.18623359998067218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,3584,0.07233920097351074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,2048,0.048485334714253744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,2560,0.05506346623102824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,1536,0.03927786747614543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,3072,0.06299413442611694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,1024,0.03075733383496602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,12288,0.22159999211629233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,256,0.01746666630109151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,128,0.015929599603017174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,768,0.028033065795898437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,512,0.020709333817164104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,8192,0.04506453275680542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,7168,0.03882879813512166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,6144,0.03343040148417155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,10240,0.051729067166646325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,16384,0.29865919748942055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,12288,0.06411946614583333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,16384,0.08191146850585937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,4096,0.02544426719347636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,2048,0.01635199983914693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,5120,0.029663999875386555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,3072,0.020771199464797975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,1536,0.013910399874051413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,1024,0.011622400085131327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,3584,0.02344320019086202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,256,0.007426133255163829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,128,0.006858666737874349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,2560,0.01957226594289144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,512,0.008772266904513042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,768,0.01045973300933838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,8192,0.039074134826660153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,16384,0.06845973332722982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,6144,0.029020800193150835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,10240,0.04386346737543742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,12288,0.0516480008761088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,7168,0.032865067323048905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,3072,0.017859200636545815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,3584,0.02002560098965963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,5120,0.025541333357493083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,2560,0.01548906664053599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,4096,0.021870932976404824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,2048,0.013543466726938883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,512,0.00719893326361974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,1536,0.01171946624914805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,1024,0.009666132926940917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,128,0.005919999877611796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,768,0.008624000350634257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,256,0.006516266862551372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,65536,0.3017173449198405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,8192,0.03669333457946777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,16384,0.06452906529108683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,7168,0.0310208002726237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,10240,0.04085973501205444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,12288,0.047967998186747234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,6144,0.026821333169937133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,3584,0.018538665771484376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,65536,0.26486080487569175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,4096,0.019716266791025797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,5120,0.02373866637547811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,3072,0.016339199741681416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,2560,0.014383999506632486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,2048,0.012892799576123557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,1536,0.010869333148002624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,1024,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,768,0.0076885332663853955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,512,0.006790400048096975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,256,0.006065066655476888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,128,0.0054730668663978575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,12288,0.04312320152918498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,10240,0.03617493311564128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,16384,0.05793493191401163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,6144,0.02435093323389689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,7168,0.027130667368570966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,65536,0.2437983989715576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,8192,0.03211306730906169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,3072,0.01532906691233317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,4096,0.018167465925216675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,3584,0.01755733291308085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,5120,0.021194666624069214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,2048,0.012242133418718975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,2560,0.013874133427937826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,1536,0.010626133282979329
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,256,0.006371200084686279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,1024,0.008359466989835102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,128,0.005770666897296906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,768,0.007784533500671387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,512,0.006931200126806895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,65536,0.21570132573445638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,12288,0.043622398376464845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,7168,0.026971733570098876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,10240,0.03658986488978068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,8192,0.031013333797454835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,16384,0.05810879866282145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,6144,0.02340266704559326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,5120,0.021321600675582884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,4096,0.017794134219487508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,3584,0.016689066092173258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,3072,0.014599466323852539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,2560,0.012931199868520102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,1536,0.010005333026250203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,2048,0.011542399724324543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,1024,0.007991466422875721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,768,0.007019733389218648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,512,0.00645653357108434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,256,0.005717333157857259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,128,0.005306666592756907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,65536,0.21830719312032065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,12288,0.040668801466623945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,16384,0.05583039919535319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,5120,0.020501333475112914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,8192,0.031070933739344282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,7168,0.026280534267425538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,6144,0.02243306636810303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,10240,0.03492586612701416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,2560,0.012521599729855856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,4096,0.017102932929992674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,3584,0.015666133165359496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,3072,0.013993600010871887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,1024,0.007761066655317943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,1536,0.009460266431172688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,2048,0.011019733548164368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,768,0.006852266689141591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,512,0.006138666470845541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,65536,0.21172800064086914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,256,0.005550933380921682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,128,0.005172266562779745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,8192,0.029270400603612263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,12288,0.03874986569086711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,10240,0.03351893424987793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,16384,0.053020799160003663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,6144,0.02220906615257263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,7168,0.025576533873875935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,5120,0.019335466623306274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,4096,0.016081066926320393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,3584,0.015191466609636942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,3072,0.01376106639703115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,2560,0.012010666728019714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,768,0.006705066561698914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,1536,0.00902933379014333
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,2048,0.010478933652242024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,1024,0.007523199915885926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,65536,0.1992736021677653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,256,0.005613866448402405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,512,0.0060586666067441305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,128,0.0051242664456367494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,10240,0.030781867106755574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,12288,0.036209066708882645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,7168,0.023178666830062866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,16384,0.04685759941736857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,6144,0.02061333258946737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,8192,0.026961066325505573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,5120,0.01809813380241394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,3072,0.013109333316485085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,4096,0.01572266618410746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,3584,0.014283733566602072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,2560,0.0114656001329422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,1536,0.008550399541854858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,2048,0.010010666648546855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,65536,0.17764479319254559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,1024,0.0071285332242647815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,256,0.0055744002262751256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,768,0.006664533416430156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,512,0.006021333237489065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,128,0.005111466844876607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,12288,0.03442026774088542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,16384,0.04677120049794515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,8192,0.02626986702283223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,10240,0.027960532903671266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,7168,0.022073600689570108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,6144,0.019393066565195717
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,5120,0.01653439998626709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,3584,0.013927466670672097
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,2048,0.00936853289604187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,3072,0.012598400314648947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,2560,0.010729599992434185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,4096,0.014693333705266317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,65536,0.16272640228271484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,1536,0.008169599870840708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,1024,0.006916266679763794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,512,0.00589333325624466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,768,0.0064064001043637585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,256,0.00543146679798762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,128,0.005100800096988678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,8192,0.026708267132441205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,10240,0.0300053338209788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,12288,0.03536320130030314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,16384,0.04951680103937785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,7168,0.023231999079386393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,2560,0.01112000048160553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,6144,0.020486400524775187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,5120,0.01792959968249003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,65536,0.18162026405334472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,4096,0.015338666240374246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,3584,0.014062933127085366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,3072,0.012507733702659608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,2048,0.009468799829483033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,1536,0.008362666765848795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,1024,0.00714026689529419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,256,0.005353599786758423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,768,0.006414933502674103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,512,0.0059008002281188965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,128,0.004958933095137278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,12288,0.033600000540415446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,8192,0.02486613392829895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,10240,0.0280021329720815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,6144,0.018481065829594932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,16384,0.04350933233896891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,7168,0.02090239922205607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,5120,0.01607360045115153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,2560,0.009706667065620423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,65536,0.15881919860839844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,4096,0.013758933544158936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,3584,0.013302399714787801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,3072,0.011611732840538024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,2048,0.008689066767692566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,1536,0.00764160007238388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,1024,0.006840533514817555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,768,0.00609493354956309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,512,0.005724800129731497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,128,0.004952533543109894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,256,0.005284266670544943
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,12288,0.03223680059115092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,10240,0.026927999655405682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,16384,0.04279040098190308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,8192,0.02442453304926554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,7168,0.02093013326327006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,6144,0.018574933211008705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,5120,0.016795732577641807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,65536,0.16756374041239422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,4096,0.013767466942469279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,3584,0.012774399916330972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,3072,0.011433600385983785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,2560,0.010386133193969726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,2048,0.008986666798591614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,1536,0.00810346653064092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,1024,0.0069248000780741375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,768,0.0062496001521746315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,512,0.005670399963855743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,256,0.005288533369700114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,16384,0.03642666737238566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,128,0.004878933231035868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,12288,0.02900586724281311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,10240,0.023895466327667238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,65536,0.13384426434834798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,8192,0.0220085342725118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,7168,0.018313600619633993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,6144,0.0160970667997996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,5120,0.014630400141080222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,3584,0.011429333686828613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,4096,0.011827199657758077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,3072,0.009893332918485005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,1536,0.007221333185831706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,2560,0.009077333410580953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,256,0.005041066805521647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,2048,0.008152533570925396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,1024,0.006346666812896728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,768,0.005773866673310598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,512,0.0054624001185099285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,128,0.00487253318230311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,8192,0.01768959959348043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,16384,0.03138773242632548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,12288,0.02360853354136149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,10240,0.02001813252766927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,7168,0.014946132898330688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,6144,0.012847999731699625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,5120,0.011283199985822041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,3072,0.00844266712665558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,2560,0.008075733482837678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,4096,0.009794132908185323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,3584,0.0095551997423172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,2048,0.007207466661930085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,65536,0.10714986324310302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,768,0.005566933254400889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,128,0.0046847999095916745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,256,0.004969599843025208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,1536,0.006720000008742015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,1024,0.005993600189685822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,512,0.0052490666508674625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,8192,0.015135999520619711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,7168,0.01337493360042572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,16384,0.02667093276977539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,12288,0.02113706668217977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,10240,0.018246400356292724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,5120,0.010684800148010255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,65536,0.09964799880981445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,6144,0.012054399649302164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,3072,0.00817920019229253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,3584,0.009431466460227966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,4096,0.009249066313107807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,512,0.005099733173847198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,2048,0.007006933291753133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,2560,0.007924266656239827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,1536,0.006631466746330261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,128,0.004646400113900503
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,1024,0.005829333265622457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,768,0.005464533468087515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,16384,0.02494186758995056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,256,0.004906666775544485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,8192,0.014518400033315023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,12288,0.019387733936309815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,10240,0.016713599363962807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,7168,0.013291733463605246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,6144,0.01202239990234375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,5120,0.010481066505114238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,4096,0.0092031995455424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,65536,0.0868671973546346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,3584,0.009168000022570292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,2560,0.007866666714350382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,1536,0.006634666522343953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,1024,0.00572266678015391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,768,0.0054175997773806255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,512,0.005060266455014547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,3072,0.008162133395671844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,2048,0.006860800087451935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,128,0.004655999938646952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,12288,0.01922773321469625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,10240,0.016593066851298015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,256,0.0048650667071342465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,6144,0.011872000495592753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,7168,0.013288533687591553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,5120,0.010513066252072652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,16384,0.024088533719380696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,65536,0.08598079681396484
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,4096,0.009170132875442504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,8192,0.014377599954605103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,3584,0.009103999535242716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,2048,0.006904533505439759
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,2560,0.00785813331604004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,1536,0.006594133377075195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,1024,0.005773866673310598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,512,0.005067733426888784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,256,0.004759466648101807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,3072,0.008101333181063335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,768,0.005323733389377594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,65536,0.08524906635284424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,10240,0.016658133268356322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,128,0.004565333326657614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,12288,0.019236266613006592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,6144,0.011895466844240825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,16384,0.024100265900293984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,3584,0.009060266613960265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,8192,0.014441600441932679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,7168,0.013217066725095114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,1536,0.006666666766007741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,5120,0.010506666700045268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,2048,0.006795733173688252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,4096,0.00912000040213267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,1024,0.0057770664493242896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,512,0.005054933329423269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,3072,0.008082133531570435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,2560,0.007673599819342296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,256,0.004705066482226053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,128,0.004555733501911163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,768,0.00535999983549118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,4096,0.08837546507517496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,5120,0.10949119726816814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,7168,0.146834135055542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,6144,0.12738880316416423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,8192,0.16519360542297362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,10240,0.2034869352976481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,3072,0.06899200280507406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,1536,0.039102931817372635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,2560,0.059759998321533205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,3584,0.07768106460571289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,768,0.02736639976501465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,2048,0.048224000136057536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,1024,0.03039146661758423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,256,0.01710933248202006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,12288,0.2430880069732666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,512,0.02066133419672648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,128,0.015539200107256571
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,7168,0.04146133263905843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,8192,0.04842880169550578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,10240,0.05662293434143066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,12288,0.06742186546325683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,16384,0.3069589296976725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,6144,0.0362496018409729
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,16384,0.08827093442281088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,4096,0.026386133829752606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,5120,0.032153600454330446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,1536,0.013942399621009826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,2048,0.016706132888793947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,2560,0.019964800278345744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,3072,0.021639466285705566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,3584,0.024117332696914674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,768,0.010441600282986959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,512,0.008717866738637288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,256,0.007446399827798207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,128,0.006746666630109151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,1024,0.0114656001329422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,12288,0.05912426710128784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,8192,0.040797865390777587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,6144,0.03142506678899129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,16384,0.0775754690170288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,7168,0.035854931672414145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,10240,0.048438398043314616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,4096,0.02323413292566935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,2560,0.016999467213948568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,2048,0.014408533771832785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,3072,0.018756266434987387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,5120,0.02752853234608968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,3584,0.021485867102940877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,1536,0.012227200468381246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,1024,0.00995306670665741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,768,0.008777599533398945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,512,0.007281066477298736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,128,0.005849599838256836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,256,0.006489600241184235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,65536,0.31009066899617516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,8192,0.03833386500676473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,10240,0.04550826549530029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,12288,0.05488640069961548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,6144,0.03020693262418111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,7168,0.0341813325881958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,16384,0.07230186462402344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,5120,0.025422932704289754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,2048,0.013725866874059042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,65536,0.2842165311177572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,3072,0.018412800629933675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,2560,0.015613866845766702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,4096,0.021618133783340453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,1536,0.011378133296966552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,3584,0.020245333512624107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,768,0.007928533355395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,1024,0.009293867150942485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,512,0.006981333096822103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,256,0.006071466704209646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,128,0.005549866457780202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,16384,0.0620746652285258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,12288,0.04699519872665405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,65536,0.26643412907918296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,10240,0.040601599216461184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,7168,0.029975465933481854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,6144,0.026610134045283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,8192,0.033421866099039715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,4096,0.01953386664390564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,3072,0.015910399953524272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,5120,0.022707200050354003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,2048,0.012498133381207784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,3584,0.018685867389043175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,2560,0.014254933595657349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,1536,0.010851200421651204
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,1024,0.008910933136940002
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,768,0.007921066880226136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,256,0.006262399752934774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,512,0.00703359991312027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,128,0.005842133363087972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,65536,0.2343594710032145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,8192,0.03493333260218302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,10240,0.04229653278986613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,16384,0.06662400166193644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,12288,0.049685335159301756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,7168,0.031310933828353885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,6144,0.02728319962819417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,4096,0.01981226603190104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,5120,0.023341866334279378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,3584,0.018232532342274985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,3072,0.016383999586105348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,2560,0.014458666245142618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,2048,0.01279146671295166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,1536,0.010538666447003683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,1024,0.008326399823029835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,256,0.0058037335673968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,768,0.0071285332242647815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,512,0.0065311998128890995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,128,0.005363200108210246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,65536,0.24518399238586425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,16384,0.0616320013999939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,12288,0.045269334316253663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,7168,0.029624533653259278
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,10240,0.040938667456309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,8192,0.03362773259480794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,5120,0.02286826570828756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,4096,0.019126399358113607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,6144,0.026310400168100996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,2048,0.012194133798281352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,3584,0.017435733477274576
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,3072,0.01585919956366221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,768,0.0070165331164995836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,2560,0.013914666573206582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,1536,0.010232533017794292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,1024,0.00804799993832906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,512,0.006322133541107178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,256,0.005555200080076853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,128,0.005202133456865946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,65536,0.24233387311299642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,6144,0.025013333559036253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,7168,0.028338134288787842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,16384,0.06254186630249023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,12288,0.04699840148289998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,5120,0.02232426603635152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,10240,0.03944426774978638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,8192,0.03274346590042114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,1536,0.009635200103123982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,3072,0.015524267156918844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,3584,0.01681386629740397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,4096,0.018768000602722167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,2048,0.012039466698964437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,65536,0.22252267201741538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,2560,0.013994666934013366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,1024,0.008025600016117096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,768,0.006877866884072621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,512,0.006185600161552429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,256,0.005509333312511444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,128,0.005173333485921224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,12288,0.039612801869710286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,10240,0.03347520033518474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,16384,0.05399253368377686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,7168,0.0262773334980011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,6144,0.02304533322652181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,8192,0.02802773316701253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,3072,0.01420799990495046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,5120,0.019523199399312338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,4096,0.016565333803494772
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,1024,0.007579733431339264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,3584,0.015543466806411744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,2560,0.012338133653004964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,2048,0.01029866635799408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,1536,0.008888533711433411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,65536,0.2103018601735433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,128,0.0052149335543314615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,768,0.006820266445477803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,512,0.006257066627343495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,256,0.005560533205668131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,8192,0.027959465980529785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,16384,0.04979093472162883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,7168,0.024489599466323852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,12288,0.03834773302078247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,10240,0.03163413405418396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,5120,0.01919999917348226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,6144,0.022823466857274374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,3584,0.015253333250681558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,4096,0.01672853430112203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,3072,0.013727999726931252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,2560,0.011720533172289532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,2048,0.010571733117103577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,65536,0.1852490743001302
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,1536,0.008563199639320373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,1024,0.007190399865309398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,768,0.006922666728496551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,512,0.0060597335298856105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,128,0.0052149335543314615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,256,0.005468800167242686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,10240,0.03745599985122681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,16384,0.05561813513437906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,12288,0.042770131429036455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,6144,0.0241482675075531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,8192,0.02987733284632365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,7168,0.027082665761311846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,5120,0.020351999998092653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,3584,0.016434133052825928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,4096,0.017795199155807497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,3072,0.014804266889890037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,2560,0.01260693371295929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,65536,0.21420159339904785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,768,0.006579199930032094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,256,0.005378133555253347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,2048,0.010503466924031575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,1536,0.009033600489298504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,1024,0.0075882668296496075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,512,0.006045866509278615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,12288,0.03911360104878743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,128,0.005065600077311197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,6144,0.023056000471115112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,10240,0.03330559929211934
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,16384,0.05077226559321085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,8192,0.028388265768686933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,7168,0.024743467569351196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,65536,0.18616426785786946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,2560,0.011303466558456422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,5120,0.018917334079742432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,4096,0.01632426679134369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,768,0.0062730665008227035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,3072,0.012645333011945089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,3584,0.015129599968592325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,512,0.0058005332946777345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,2048,0.009764267007509868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,1536,0.008154666423797608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,1024,0.0072522665063540145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,256,0.005266133447488149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,10240,0.03095039923985799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,128,0.004937600096066793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,8192,0.02723520000775655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,7168,0.02445440093676249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,12288,0.03717653354008992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,16384,0.04999786615371704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,5120,0.017777067422866822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,6144,0.021758933862050377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,4096,0.015506133437156677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,3072,0.012872533003489176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,3584,0.014380799730618796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,65536,0.19547947247823078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,768,0.0065749332308769224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,2560,0.01139520009358724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,1536,0.008468266328175862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,2048,0.009649067123730978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,1024,0.0071946665644645694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,10240,0.02831253409385681
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,512,0.0058890665570894875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,256,0.00527999997138977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,128,0.005025066435337067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,8192,0.02359573245048523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,16384,0.04269013404846191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,12288,0.033086933692296344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,65536,0.15628587404886882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,7168,0.02133973240852356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,2560,0.010207999746004741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,6144,0.019143466154734293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,4096,0.01393066644668579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,5120,0.01590720017751058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,3072,0.011799466609954835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,1024,0.006612266600131989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,3584,0.012725333372751871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,768,0.006018133461475372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,2048,0.008699733018875121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,256,0.005058133105436961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,1536,0.007613866527875264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,512,0.005547733108202616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,8192,0.0205567995707194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,7168,0.016217600305875143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,128,0.004923733572165171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,16384,0.03563520113627116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,10240,0.02353066603342692
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,65536,0.12598506609598797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,12288,0.026371200879414875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,5120,0.012681600451469422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,4096,0.010670933127403259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,3584,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,2560,0.008616532882054646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,6144,0.016292267044385276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,512,0.005314133564631144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,3072,0.00941973328590393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,2048,0.0075989335775375364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,1536,0.006810666620731353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,768,0.0056650668382644655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,1024,0.006021333237489065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,256,0.004935466746489207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,16384,0.03014506697654724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,128,0.004750933249791463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,65536,0.10831999778747559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,5120,0.011426132917404175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,10240,0.01921280026435852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,12288,0.023005867004394533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,3584,0.009326933821042379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,8192,0.015523200233777365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,7168,0.013933866222699483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,6144,0.012834133704503379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,4096,0.009697066744168599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,2560,0.007975466549396515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,1536,0.006647466619809468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,2048,0.007053866485754649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,512,0.0051466668645540874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,3072,0.008328533172607422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,128,0.004705066482226053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,1024,0.005796266595522562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,768,0.0053962667783101406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,256,0.004846933484077454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,10240,0.01667733391125997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,65536,0.08891092936197917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,12288,0.019338667392730713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,16384,0.025884799162546795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,6144,0.011585066715876263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,5120,0.01051520009835561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,3584,0.009179733196894328
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,8192,0.014313600460688271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,7168,0.013078400492668152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,4096,0.009192533294359843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,1024,0.005726933479309082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,768,0.005453866720199585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,512,0.0051584000388781226
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,3072,0.008130133152008057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,2560,0.007754666606585185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,2048,0.006809600194295247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,1536,0.006566399832566579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,16384,0.023821866512298583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,10240,0.016568533579508462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,65536,0.08402667045593262
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,8192,0.01418880025545756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,256,0.0048437332113583885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,6144,0.011569066842397054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,128,0.004675200084845225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,12288,0.01876586675643921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,7168,0.013010133306185404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,3584,0.00904746651649475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,2560,0.007773866752783458
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,3072,0.007991466422875721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,2048,0.0067445332805315655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,1536,0.006470400094985962
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,1024,0.005679999788602194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,4096,0.00918506681919098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,5120,0.01048533320426941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,768,0.005351466437180838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,128,0.004572799801826477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,256,0.004762666424115499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,512,0.005127466718355815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,16384,0.023705599705378215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,12288,0.018758400281270345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,10240,0.016590933005015053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,6144,0.011541333794593812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,5120,0.01051200032234192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,65536,0.08373653093973796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,3072,0.007950933277606964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,8192,0.014229333400726319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,7168,0.013062399625778199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,2560,0.007629866898059845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,2048,0.006724266707897187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,4096,0.009105066458384197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,3584,0.008884267012278239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,1024,0.005774933099746704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,512,0.005086933573087057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,768,0.0052714665730794275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,256,0.004785066843032837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,128,0.004577066500981649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,1536,0.006517333288987477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,4096,0.07285333474477132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,5120,0.08945173422495524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,6144,0.10455573399861653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,7168,0.12042026519775391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,8192,0.13605119387308756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,10240,0.16770985921223958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,12288,0.19837759335835775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,3072,0.05751039981842041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,3584,0.06514666477839151
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,2048,0.04044160048166911
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,2560,0.048979198932647704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,1024,0.026900267601013182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,1536,0.03359466791152954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,256,0.013931733369827271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,16384,0.26612799962361655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,512,0.017663999398549398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,128,0.012421333789825439
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,768,0.022679466009140014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,5120,0.027689599990844728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,7168,0.03582079807917277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,8192,0.04319253365198771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,10240,0.04780266682306926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,6144,0.03169493277867635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,12288,0.056075731913248696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,16384,0.07258559862772623
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,3072,0.019427200158437095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,2048,0.014629333217938741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,4096,0.023523199558258056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,1024,0.010279466708501179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,2560,0.01734506686528524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,3584,0.021859200795491536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,1536,0.012628266215324402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,256,0.0061930666367212926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,128,0.005751466751098633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,512,0.007447466750939687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,768,0.009174399574597676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,12288,0.04390506744384766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,65536,0.26776212056477866
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,16384,0.05688746770222982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,6144,0.02499413291613261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,7168,0.028467200199762982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,8192,0.03149333397547404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,10240,0.03784213463465373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,3072,0.015449600418408713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,4096,0.020177066326141357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,5120,0.022436267137527464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,1536,0.01027413308620453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,3584,0.017433599630991618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,2560,0.013730133573214212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,2048,0.011920000116030376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,128,0.0051146666208903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,512,0.00600853314002355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,256,0.0054175997773806255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,768,0.007699200014273326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,1024,0.008634666601816814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,65536,0.20690132776896158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,16384,0.048987734317779544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,6144,0.02203413248062134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,12288,0.038414935270945236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,7168,0.024858667453130086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,10240,0.03330133358637492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,8192,0.028050132592519122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,5120,0.02099840044975281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,4096,0.016596266627311708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,3072,0.013635200262069703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,1536,0.009318400422732036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,2560,0.01216319998105367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,2048,0.0107424000898997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,1024,0.008138666550318401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,3584,0.015131733814875283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,65536,0.1783445358276367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,768,0.006392533580462138
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,512,0.005671466886997223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,256,0.005190399785836538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,128,0.0049333333969116214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,12288,0.03226133386294047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,16384,0.04141120115915935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,5120,0.01667520006497701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,6144,0.01906026601791382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,7168,0.02132693330446879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,8192,0.023755733172098795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,10240,0.029708800713221233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,4096,0.014251733819643656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,3584,0.013433600465456644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,2048,0.009937066833178203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,3072,0.011917866269747416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,1024,0.00697920024394989
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,2560,0.011034666498502096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,1536,0.00895360012849172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,65536,0.14812587102254232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,768,0.006018133461475372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,512,0.0056991999348004665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,256,0.005323733389377594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,128,0.0051136001944541935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,12288,0.030500266949335737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,10240,0.028358399868011475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,16384,0.03830080032348633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,8192,0.02245866656303406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,7168,0.020170666774113975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,6144,0.017806933323542277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,5120,0.01590079963207245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,3072,0.01118293305238088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,4096,0.013354667027791343
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,3584,0.012262399991353352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,2560,0.010487467050552368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,1536,0.008313600222269695
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,2048,0.009488000472386678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,1024,0.006400000055631001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,65536,0.13722559611002605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,512,0.005407999952634176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,768,0.005659733215967814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,256,0.005054933329423269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,128,0.004835199813048045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,12288,0.028085333108901978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,16384,0.03520319859186809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,10240,0.02611733277638753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,6144,0.01634986698627472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,7168,0.018525866667429607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,5120,0.014442666371663412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,8192,0.020914133389790854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,4096,0.012377599875132244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,3584,0.011453866958618164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,3072,0.010566400488217671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,2560,0.009786666433016459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,65536,0.12408959865570068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,1536,0.007609599828720092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,2048,0.00885653297106425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,1024,0.005976533393065134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,512,0.005222400029500326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,768,0.00550186683734258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,256,0.0048885335524876915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,128,0.00468800018231074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,12288,0.026293333371480303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,10240,0.02285439968109131
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,16384,0.03283413251241048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,6144,0.014944000045458474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,8192,0.01943040092786153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,7168,0.017061332861582436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,5120,0.013471999764442444
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,4096,0.011734400192896526
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,3072,0.010063999891281128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,3584,0.011044266819953918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,65536,0.11513066291809082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,2560,0.009392000238100688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,1536,0.007202133536338806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,2048,0.008538666367530822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,1024,0.005807999769846598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,768,0.00544106662273407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,256,0.004902400076389313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,512,0.005172266562779745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,128,0.004746666550636292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,12288,0.02184106707572937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,16384,0.027113600571950273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,10240,0.019063466787338258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,7168,0.014258133371671042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,6144,0.01288746694723765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,8192,0.01604693333307902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,5120,0.011819733182589214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,3072,0.009168000022570292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,4096,0.010435199737548828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,65536,0.09456106821695963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,3584,0.010029866298039754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,1024,0.005594666798909505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,2048,0.007049599786599477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,2560,0.008738133311271667
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,1536,0.006588799754778545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,512,0.005165866514046987
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,128,0.004721066852410635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,768,0.0054506664474805195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,256,0.0048885335524876915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,12288,0.020914133389790854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,10240,0.018161066373189292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,16384,0.025973333915074663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,8192,0.015421866377194723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,7168,0.013708800077438354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,6144,0.012391466895739238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,5120,0.011353600025177001
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,65536,0.08824319839477539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,4096,0.009991466999053955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,3584,0.00983679989973704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,3072,0.008904533584912618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,2048,0.007037866612275441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,1024,0.005590400099754334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,2560,0.008152533570925396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,1536,0.00644053320089976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,768,0.005406933526198069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,256,0.004894933104515076
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,512,0.0051818668842315676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,128,0.004647466540336609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,12288,0.0217141330242157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,10240,0.018947199980417887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,16384,0.027885866165161134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,8192,0.01644266645113627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,6144,0.012889599800109864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,7168,0.014503467082977294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,5120,0.011830400427182515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,65536,0.09679360389709472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,3072,0.008794666330019633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,4096,0.010522666573524474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,3584,0.00978773335615794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,2560,0.007517866790294647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,1536,0.006097066899140676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,2048,0.006846933563550313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,1024,0.005612800021966299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,768,0.005310933291912079
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,512,0.004986666639645894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,256,0.004730666677157084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,128,0.004679466784000397
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,12288,0.020897066593170165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,16384,0.027060266335805255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,10240,0.01844586730003357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,6144,0.012733866771062216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,8192,0.01595200002193451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,7168,0.01423679987589518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,5120,0.01178666651248932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,65536,0.09418346881866455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,4096,0.010342400272687275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,3072,0.00811839997768402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,2560,0.007502933343251546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,3584,0.00909546713034312
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,2048,0.006798933446407318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,1536,0.006056533257166544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,1024,0.005669333537419637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,768,0.005251200000445048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,256,0.004729599754015604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,512,0.005043200155099233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,128,0.004539733131726583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,12288,0.018304000298182167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,16384,0.02271359960238139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,8192,0.014055466651916504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,10240,0.016218666235605875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,65536,0.07826879819234213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,7168,0.012667733430862426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,5120,0.010496000448862713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,6144,0.011490133404731751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,4096,0.008987733721733093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,3072,0.00767680009206136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,3584,0.009005866448084513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,2560,0.007238399982452392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,2048,0.006387199958165486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,1024,0.0054506664474805195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,768,0.005195733408133189
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,1536,0.006162133316198985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,512,0.0049450665712356566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,256,0.004783999919891357
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,128,0.004604800045490265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,16384,0.022124799092610677
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,12288,0.01809813380241394
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,10240,0.015889066457748412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,65536,0.07698559761047363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,8192,0.01386666695276896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,6144,0.011310933033625285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,7168,0.012552533547083536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,5120,0.009921066959698995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,4096,0.00856213370958964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,3072,0.007494399944941203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,3584,0.008398933211962382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,2560,0.007207466661930085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,2048,0.006366933385531108
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,1536,0.006061866879463196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,768,0.005155199766159057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,1024,0.005379199981689453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,128,0.004498133560021719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,256,0.004727466901143392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,512,0.0049002667268117275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,16384,0.02175040046374003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,12288,0.01805866758028666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,8192,0.0132832000652949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,10240,0.015731199582417806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,65536,0.07641386985778809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,7168,0.012001066406567892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,6144,0.010431999961535137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,5120,0.009620267152786254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,4096,0.008364799618721008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,3072,0.007341866691907247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,3584,0.008190933366616566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,2048,0.00628053347269694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,2560,0.007020799815654755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,1536,0.006004266440868378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,1024,0.005336533486843109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,768,0.005053866902987162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,512,0.0047658666968345646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,128,0.004530133306980133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,256,0.0045514668027559916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,16384,0.021985065937042237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,65536,0.0759488026301066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,12288,0.017926400899887084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,10240,0.015373866756757101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,8192,0.013194666306177775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,6144,0.01039466659228007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,7168,0.011659733454386393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,5120,0.009460266431172688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,4096,0.008438400427500407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,3072,0.007340799768765767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,3584,0.008215466638406117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,2560,0.00703359991312027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,2048,0.006286933521429698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,768,0.005046399931112925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,1024,0.005269333223501841
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,1536,0.006031999985376993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,512,0.004837333162625631
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,256,0.004556799928347269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,128,0.004468266665935516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,16384,0.02182506720225016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,8192,0.012804266810417176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,12288,0.017246933778127034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,10240,0.014903466900189719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,65536,0.07547840277353922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,7168,0.011520000298817952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,6144,0.010312533378601075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,5120,0.009482666850090027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,4096,0.008277333279450735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,3584,0.008218666911125183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,3072,0.007344000041484833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,2560,0.007045333087444305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,2048,0.006217599908510844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,1536,0.005947733422120413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,768,0.005077333251635233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,1024,0.005283200244108836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,512,0.0047423998514811196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,256,0.0046079998215039575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,128,0.004385066529115042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,16384,0.020843732357025146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,65536,0.07404266993204753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,12288,0.01691626707712809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,10240,0.01486186683177948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,8192,0.0127402663230896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,7168,0.011547733346621196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,6144,0.010368000467618306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,5120,0.00937493344148
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,4096,0.0083146666487058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,3072,0.0073173334201176955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,3584,0.008130133152008057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,2560,0.006937600175539653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,2048,0.0061941335598627726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,1536,0.006025599936644236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,1024,0.005253333350022634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,768,0.004973866542180379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,512,0.0047882666190465295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,128,0.004340266684691111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,256,0.004524800181388855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,65536,0.07457173665364583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,16384,0.020859734217325846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,12288,0.01692906618118286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,10240,0.014843733112017313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,8192,0.012810666362444559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,7168,0.01146666705608368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,5120,0.009408000111579894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,6144,0.010327466328938802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,3072,0.0072736000021298725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,4096,0.008319999774297078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,3584,0.00817386656999588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,2560,0.006984533369541168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,2048,0.0061951999862988796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,1024,0.005277866621812185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,1536,0.005939200023810069
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,768,0.004999466737111409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,256,0.004531200230121613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,512,0.004732800026734671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,128,0.004353066782156626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,4096,0.07242026329040527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,5120,0.0887221336364746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,6144,0.1037333329518636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,7168,0.11935466925303143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,3584,0.06455359856287637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,8192,0.135641606648763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,1024,0.02632853388786316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,10240,0.16641920407613117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,2560,0.04853973388671875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,3072,0.056518399715423585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,12288,0.19686293601989746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,1536,0.033037867148717245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,2048,0.04025706847508748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,256,0.013715199629465737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,768,0.021499733130137123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,512,0.017509333292643228
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,128,0.01204373339811961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,16384,0.2653834660847982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,12288,0.05495573282241821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,7168,0.03522986570994059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,6144,0.03134613235791524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,8192,0.03939733505249023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,16384,0.07172266642252603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,10240,0.047253334522247316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,5120,0.029363199075063066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,1536,0.01251413325468699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,2560,0.01718506614367167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,2048,0.01460906664530436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,3072,0.019217065970102944
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,1024,0.01030293305714925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,3584,0.021538132429122926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,4096,0.023171200354894003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,768,0.009224533041318258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,256,0.00625493327776591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,512,0.007088000078996022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,128,0.005766400198141734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,10240,0.037405868371327713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,12288,0.04304853280385335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,8192,0.03099626700083415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,16384,0.05583146810531616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,6144,0.025068799654642742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,7168,0.028128000100453694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,2048,0.011868799726168316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,5120,0.022116265694300332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,4096,0.018923733631769815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,2560,0.013637333114941915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,3584,0.01712533235549927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,3072,0.015313067038853965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,1536,0.0102101335922877
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,1024,0.008756267031033833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,65536,0.27393598556518556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,768,0.007546666761239369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,512,0.0060479998588562015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,256,0.005491200089454651
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,128,0.005186133086681366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,8192,0.027507199843724565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,7168,0.024731733401616416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,6144,0.021944532791773476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,10240,0.032604799667994185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,12288,0.040484265486399336
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,16384,0.0486954649289449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,65536,0.20699946085611978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,2048,0.0107232004404068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,5120,0.01952000061670939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,4096,0.016555733482042947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,2560,0.012237866719563801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,3072,0.013455999890963235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,3584,0.015057067076365152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,1536,0.009742933511734008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,1024,0.008004266520341237
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,768,0.00639466643333435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,512,0.0059114664793014525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,256,0.005267199873924255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,128,0.0049685334165891016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,12288,0.03158400058746338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,10240,0.027644799153010054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,8192,0.023269333442052207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,16384,0.041716265678405764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,7168,0.02101973295211792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,6144,0.01906879941622416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,65536,0.17499626477559407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,3584,0.01346879998842875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,1536,0.009422933061917622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,5120,0.016771199305852254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,3072,0.012009599804878235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,2560,0.010999466975529988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,4096,0.014403200149536133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,2048,0.00983786682287852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,1024,0.0068896000583966565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,256,0.0054730668663978575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,768,0.006423466900984447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,512,0.005906133353710175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,128,0.0051136001944541935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,65536,0.14545812606811523
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,12288,0.029792000850041706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,8192,0.022247467438379923
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,16384,0.039084800084431964
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,10240,0.026268800099690754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,7168,0.020122667153676353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,6144,0.017685333887736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,5120,0.016670932372411094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,1024,0.006546133259932201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,4096,0.013427199920018515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,3584,0.01225279966990153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,3072,0.01104213297367096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,2560,0.010410666465759277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,2048,0.009342933694521587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,1536,0.00819946676492691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,128,0.004850133260091146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,512,0.005565866827964783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,768,0.006140799820423126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,256,0.005141333242257436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,65536,0.13509119351704915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,16384,0.03610026836395264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,12288,0.027543467283248902
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,10240,0.024297600984573363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,8192,0.02075200080871582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,7168,0.018338133891423546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,6144,0.016344533363978068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,5120,0.015692800283432007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,4096,0.012590932846069335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,3584,0.011487999558448791
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,2560,0.009709866841634114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,3072,0.010514133175214132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,2048,0.008846933643023174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,1536,0.007612800101439159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,1024,0.006203733384609222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,65536,0.12525973320007325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,768,0.005809066692988077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,256,0.0049674664934476215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,512,0.005422933399677277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,128,0.004726399978001913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,12288,0.02576853235562642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,6144,0.01498240033785502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,10240,0.02410986622174581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,16384,0.03406613270441691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,8192,0.01920959949493408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,4096,0.011852799852689107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,5120,0.01337386667728424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,7168,0.017375999689102174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,3584,0.010957866907119751
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,3072,0.01015786627928416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,2048,0.008499200145403545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,2560,0.009448533256848652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,1536,0.006897066533565521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,768,0.005678933362166087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,1024,0.00613973339398702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,65536,0.11764480272928875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,512,0.005356800059477488
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,256,0.005006933212280273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,128,0.004759466648101807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,12288,0.022278400262196858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,4096,0.010827733079592387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,16384,0.030186667044957476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,10240,0.019874133666356406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,7168,0.014852266510327658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,8192,0.017488000790278117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,5120,0.01234773298104604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,6144,0.01360106666882833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,3584,0.010339200496673584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,3072,0.009302399555842082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,2560,0.00879253347714742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,512,0.005351466437180838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,65536,0.10263253053029378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,1536,0.006630399823188781
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,2048,0.007403733332951863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,1024,0.005949866771697998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,768,0.0058890665570894875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,256,0.005030400057633718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,128,0.004741333425045013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,8192,0.015264000495274863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,7168,0.014153599739074707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,12288,0.020542933543523153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,10240,0.017813332875569663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,16384,0.028328533967336016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,6144,0.01304746667544047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,5120,0.011392000317573547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,4096,0.010270933310190838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,3584,0.009706667065620423
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,3072,0.008982400099436443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,2560,0.008297599852085114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,65536,0.0930346647898356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,2048,0.007142400244871776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,1536,0.006539733211199443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,1024,0.005801600217819214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,768,0.005577600002288819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,512,0.005238399902979533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,256,0.004914133250713349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,128,0.004637866715590159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,12288,0.02190720041592916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,16384,0.02910826603571574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,10240,0.019215999046961467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,8192,0.016090666254361473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,7168,0.014841600259145101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,6144,0.013212800025939941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,5120,0.011981866757074992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,4096,0.010712533195813497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,65536,0.10168320337931316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,3072,0.00874773363272349
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,3584,0.010014933347702027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,2560,0.007937066753705342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,2048,0.007208533088366191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,1536,0.006316799918810527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,1024,0.005832533538341522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,768,0.005422933399677277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,512,0.005138133466243744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,128,0.004691199958324432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,256,0.004806399842103322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,12288,0.020941867431004842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,16384,0.027140265703201293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,10240,0.01824000080426534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,8192,0.01556373337904612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,7168,0.014598400394121806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,6144,0.012943999965985617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,5120,0.011582932869593303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,65536,0.0952298641204834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,4096,0.010260267059008281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,3584,0.009498666723569233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,3072,0.008310399949550629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,2560,0.007284266750017802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,2048,0.006779733300209046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,1536,0.006135466694831848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,1024,0.005716266731421152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,768,0.005345066885153452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,512,0.0051242664456367494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,256,0.004841599861780802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,128,0.0045162667830785114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,8192,0.014121599992116294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,12288,0.019803732633590698
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,16384,0.025281065702438356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,10240,0.01665173371632894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,6144,0.012058666348457337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,65536,0.08626133600870768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,7168,0.012758400042851767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,5120,0.01055999994277954
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,4096,0.009083732962608337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,3584,0.008571733037630718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,3072,0.007853866616884867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,2560,0.007413333157698314
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,2048,0.006929066777229309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,1536,0.006232533355553945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,1024,0.005613866448402405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,768,0.005336533486843109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,512,0.005107200145721436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,256,0.004863999783992767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,128,0.004590933521588644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,16384,0.022268799940745036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,12288,0.01779200037320455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,10240,0.01548479994138082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,8192,0.013352533181508383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,7168,0.01241600016752879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,65536,0.07590186595916748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,6144,0.01128000020980835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,5120,0.00985706647237142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,4096,0.00848640004793803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,3584,0.008213333288828532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,3072,0.007447466750939687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,2560,0.007112533350785573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,2048,0.006435200075308482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,1536,0.005983999868233999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,768,0.005212800204753875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,1024,0.005342933535575867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,512,0.004994133114814758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,256,0.0047872001926104225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,128,0.004578133424123129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,16384,0.021758933862050377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,12288,0.017618133624394735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,65536,0.0743018627166748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,10240,0.01527679959932963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,8192,0.012854400277137756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,7168,0.011755733688672384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,6144,0.010548266768455505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,5120,0.009348266323407491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,3072,0.00729066679875056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,4096,0.008321066697438557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,3584,0.008052266637484233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,2560,0.00695360004901886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,2048,0.00626986672480901
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,1536,0.00597866674264272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,1024,0.005285333096981049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,768,0.005096533397833506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,512,0.0048543999592463175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,256,0.004669866462548574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,128,0.004435199995835622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,12288,0.017157334089279174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,10240,0.01471466620763143
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,16384,0.02183893322944641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,8192,0.012459733088811238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,7168,0.01134933332602183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,65536,0.07385173638661703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,6144,0.010339200496673584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,5120,0.009272533655166625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,4096,0.008307200173536937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,3584,0.008130133152008057
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,3072,0.007243733108043671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,2560,0.006975999971230824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,1536,0.005975466469923655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,2048,0.006278400123119354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,1024,0.005262933174769084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,12288,0.016834133863449098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,512,0.0048213332891464235
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,768,0.005015466610590617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,256,0.004554666578769684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,128,0.004489600161711375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,10240,0.014282666643460593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,16384,0.02152000069618225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,65536,0.07330346902211507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,8192,0.012213333447774252
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,7168,0.011358933647473653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,6144,0.01030293305714925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,5120,0.00936853289604187
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,4096,0.008272000153859456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,3584,0.008138666550318401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,2560,0.006903466582298279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,3072,0.007241599758466085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,1536,0.0059125334024429325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,2048,0.006197333335876465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,768,0.005002666513125102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,1024,0.005246933301289876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,512,0.0047658666968345646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,256,0.0045162667830785114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,128,0.004385066529115042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,65536,0.0724842627843221
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,16384,0.020883200565973918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,12288,0.01649386684099833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,10240,0.014300800363222756
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,8192,0.0122079998254776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,7168,0.011288533608118694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,6144,0.010310399532318115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,5120,0.009246933460235595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,4096,0.008231466511885326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,3584,0.008025600016117096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,2560,0.006875733534495036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,3072,0.007163733243942261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,2048,0.006247466802597046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,1024,0.005201066533724466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,1536,0.005894400179386139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,768,0.005026133358478546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,512,0.004772266745567322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,256,0.004503466685612996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,128,0.0043605332573254905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,12288,0.01649386684099833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,16384,0.020787199338277183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,65536,0.07256426811218261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,10240,0.014252799749374389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,8192,0.012205866972605388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,7168,0.011281067132949829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,4096,0.008227199812730153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,6144,0.010310399532318115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,5120,0.009257599711418152
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,3584,0.008011733492215473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,3072,0.007136000196139018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,2560,0.00695360004901886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,2048,0.006270933151245117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,1536,0.0059456000725428265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,512,0.00469760000705719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,1024,0.005211733281612396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,768,0.005073066552480062
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,256,0.004456533491611481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,128,0.004320000112056732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,4096,0.07242240111033121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,6144,0.10356799761454265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,5120,0.0893077294031779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,7168,0.11851413249969482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,8192,0.13609280586242675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,10240,0.16573012669881185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,3072,0.05646080176035563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,2048,0.0395850658416748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,3584,0.06406933466593424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,2560,0.04801599979400635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,1536,0.03252480030059814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,12288,0.19658773740132648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,1024,0.025649066766103106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,768,0.021105066935221354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,512,0.016826667388280234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,128,0.01171946624914805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,256,0.01354986627896627
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,16384,0.2700501441955566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,6144,0.030868266026179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,8192,0.039324800173441574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,12288,0.05462506612141928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,7168,0.03498986562093099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,10240,0.04758933385213216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,16384,0.07522453467051188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,5120,0.029000532627105714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,4096,0.0228928009668986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,1024,0.010110933581988018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,3584,0.021410133441289267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,2048,0.014269866545995078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,1536,0.01311360001564026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,3072,0.019181867440541588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,2560,0.01694613297780355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,512,0.007496533294518788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,768,0.00904746651649475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,256,0.00636053333679835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,128,0.005731200178464254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,12288,0.04465173482894898
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,10240,0.038541865348815915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,8192,0.032585599025090534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,16384,0.06057386795679728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,7168,0.02845226724942525
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,6144,0.025875200827916462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,5120,0.022345600525538127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,2048,0.012494933605194092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,2560,0.013964800039927163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,4096,0.019285333156585694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,3072,0.015915733575820924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,3584,0.01768959959348043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,256,0.005548800031344095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,1536,0.01032960017522176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,65536,0.2711935997009277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,768,0.0076330666740735366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,512,0.006343466540177663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,1024,0.008613333106040955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,128,0.005026133358478546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,10240,0.0352021336555481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,7168,0.026800000667572023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,6144,0.023484800259272257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,8192,0.030634667476018267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,12288,0.0421887993812561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,16384,0.0563754677772522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,65536,0.2123413403828939
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,5120,0.020562134186426797
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,3072,0.014141866564750671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,4096,0.01809920072555542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,2560,0.013219199577967324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,3584,0.016194132963816325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,1024,0.008150400221347808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,2048,0.010994133353233338
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,1536,0.009829333424568177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,512,0.006247466802597046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,768,0.006762666503588359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,256,0.005300266544024149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,128,0.004850133260091146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,8192,0.027477333943049114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,10240,0.03235093355178833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,7168,0.02424853245417277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,12288,0.037749334176381426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,65536,0.19451093673706055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,16384,0.050843731562296546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,6144,0.021283199389775596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,3072,0.013436800241470337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,5120,0.019411200284957887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,2048,0.01069546639919281
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,4096,0.016500266393025716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,3584,0.015132799744606018
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,2560,0.01229866643746694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,768,0.006738133231798808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,256,0.005534933507442474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,1536,0.009492266178131103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,1024,0.007614933451016744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,512,0.006087466577688853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,128,0.005163733164469401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,65536,0.1705311934153239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,12288,0.03752426703770955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,8192,0.02637973427772522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,10240,0.031063467264175415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,16384,0.04749759833017985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,7168,0.02407039999961853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,6144,0.020746666193008422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,5120,0.018599466482798258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,4096,0.015774933497111003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,3584,0.014739200472831726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,2560,0.011553066968917846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,3072,0.012717866897583007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,2048,0.01067626674969991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,1536,0.008797867099444072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,1024,0.007477333148320516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,256,0.0051818668842315676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,768,0.006320000191529592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,512,0.005745066702365876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,65536,0.17126827239990233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,128,0.004759466648101807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,12288,0.03488213221232096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,7168,0.02243306636810303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,16384,0.04488319953282674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,10240,0.029725867509841918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,8192,0.02528426647186279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,4096,0.014957867066065469
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,5120,0.01737706661224365
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,6144,0.020118399461110433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,3584,0.014131200313568116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,3072,0.012356266379356384
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,2560,0.011142399907112122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,2048,0.00974826713403066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,1536,0.008689066767692566
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,1024,0.006856533388296763
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,768,0.00625493327776591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,512,0.005619200070699056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,65536,0.17190613746643066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,256,0.005020800232887268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,128,0.004770133395989736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,10240,0.02807146708170573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,12288,0.03348480065663655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,16384,0.045238399505615236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,8192,0.024241065979003905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,6144,0.018999467293421425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,5120,0.017395200332005818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,7168,0.02178986668586731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,3584,0.013148799538612366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,4096,0.014334932963053385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,1024,0.00665280024210612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,3072,0.012053333719571431
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,65536,0.16372159322102864
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,2560,0.01141866644223531
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,2048,0.009893332918485005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,128,0.004653866589069367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,1536,0.00782719999551773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,768,0.006001066664854685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,512,0.00561706672112147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,256,0.005021866659323374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,7168,0.02058239976565043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,10240,0.02639893293380737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,16384,0.04264533519744873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,6144,0.0180949330329895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,12288,0.03102186719576518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,8192,0.02304960091908773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,5120,0.015176533659299215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,4096,0.01350826621055603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,3584,0.012732799847920737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,3072,0.011550933122634888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,2560,0.010614400108655293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,2048,0.009241599837938945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,1536,0.007477333148320516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,65536,0.1458687941233317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,1024,0.006450133522351582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,768,0.006043733159701029
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,256,0.0050346667567888895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,512,0.005550933380921682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,128,0.0047423998514811196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,16384,0.03608959913253784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,7168,0.01880319913228353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,6144,0.016361600160598753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,4096,0.012759466965993246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,8192,0.020610133806864418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,10240,0.024308266242345174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,12288,0.028246400753657024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,5120,0.014717866977055868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,3584,0.012661332885424295
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,3072,0.010964266459147136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,2560,0.00981333355108897
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,1024,0.006217599908510844
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,2048,0.008214400211970011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,65536,0.12418453693389893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,768,0.005850666761398315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,1536,0.0071839998165766404
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,512,0.005412266651789347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,256,0.005041066805521647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,12288,0.029976532856623335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,128,0.004647466540336609
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,16384,0.04027200142542521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,10240,0.025406932830810545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,8192,0.022233599424362184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,7168,0.01954666574796041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,6144,0.017410133282343546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,5120,0.015146666765213012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,4096,0.013191466530164083
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,3584,0.01223360002040863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,3072,0.011103999614715577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,2560,0.009769599636395771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,2048,0.008295466502507527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,1536,0.007645866771539052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,256,0.004827733337879181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,65536,0.1490752061208089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,512,0.0053727999329566956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,1024,0.006205866734186808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,768,0.0057536001006762184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,128,0.0046069333950678505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,12288,0.026829866568247478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,6144,0.015819733341534935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,8192,0.019882667064666747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,10240,0.023248000939687093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,16384,0.036266668637593584
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,5120,0.013808000087738036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,7168,0.0177130659421285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,3072,0.009724799791971843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,4096,0.012167466680208842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,3584,0.011223466197649638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,2560,0.008682666222254436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,65536,0.12699413299560547
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,2048,0.007629866898059845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,1536,0.006900266806284587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,1024,0.006123733520507812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,768,0.005541333556175232
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,512,0.005222400029500326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,256,0.004862933357556661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,128,0.004612266520659129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,16384,0.03688746690750122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,12288,0.02688960035641988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,10240,0.022859734296798707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,6144,0.016244266430536905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,8192,0.019851734240849815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,4096,0.011819733182589214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,7168,0.018216532468795777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,65536,0.12132480144500732
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,5120,0.01437226633230845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,1024,0.006197333335876465
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,3072,0.01033066709836324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,3584,0.011060266693433126
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,2560,0.009215999643007915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,2048,0.007996800045172375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,1536,0.007534933090209961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,768,0.005769599974155426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,512,0.005291733145713806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,256,0.004829866687456766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,128,0.004677333434422811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,16384,0.029407999912897748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,12288,0.0228767991065979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,65536,0.10266346931457519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,10240,0.019789866606394448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,8192,0.017177599668502807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,7168,0.014681599537531533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,6144,0.013592533270517983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,2560,0.007995733122030894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,5120,0.011969066659609477
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,1536,0.006532266736030579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,4096,0.009991466999053955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,3584,0.00939626693725586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,3072,0.008659199873606364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,512,0.005130666494369507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,2048,0.007090133428573608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,1024,0.005794133245944977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,768,0.005379199981689453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,16384,0.025150932868321735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,10240,0.016572800278663636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,256,0.004875733455022176
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,128,0.0045162667830785114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,65536,0.08750826517740885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,5120,0.009832533200581868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,12288,0.018787199258804323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,8192,0.013602133591969809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,7168,0.011932800213495891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,3072,0.007482666770617168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,2560,0.007333333293596904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,6144,0.010919466614723206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,1536,0.005997866888840993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,4096,0.008575999736785888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,3584,0.008238933483759562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,2048,0.006717866659164429
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,1024,0.005355733136336008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,16384,0.02162453333536784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,512,0.004857600231965383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,768,0.005153066913286845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,128,0.004444799820582072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,256,0.004666666686534882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,12288,0.01714986761411031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,65536,0.07552533149719239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,10240,0.014632532993952433
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,8192,0.012286933263142903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,3584,0.008108800152937572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,5120,0.00918826659520467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,7168,0.011241599917411804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,6144,0.010190932949384054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,4096,0.008220799763997396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,3072,0.00720000018676122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,2560,0.006954666475454967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,1024,0.005302399893601735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,1536,0.005909333129723867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,2048,0.00621013343334198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,768,0.005096533397833506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,512,0.004772266745567322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,256,0.004567466676235199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,16384,0.021217066049575805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,128,0.004470400015513102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,6144,0.010089600086212158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,12288,0.016301866372426352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,8192,0.012059733271598816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,10240,0.01409173309803009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,7168,0.011115733782450359
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,65536,0.07156053384145102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,5120,0.009178666273752849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,4096,0.008183466891447704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,2560,0.0068256000677744556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,2048,0.006131199995676676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,3584,0.007961600025494894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,3072,0.007154133419195812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,65536,0.07041493256886801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,1536,0.005815466741720835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,1024,0.005230933427810669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,768,0.005022933085759481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,512,0.004724266628424326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,256,0.004517333209514618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,128,0.004370133578777313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,6144,0.010136533776919048
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,16384,0.020386133591334024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,5120,0.009127466877301534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,4096,0.008124800026416778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,3584,0.007948799928029378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,12288,0.015931733449300132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,10240,0.014057599504788718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,7168,0.011160533626874287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,8192,0.012061867117881774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,512,0.004694400231043497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,768,0.00496319979429245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,3072,0.007190399865309398
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,2560,0.0067775999506314594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,2048,0.006161066889762879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,1536,0.00588266650835673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,1024,0.00516480008761088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,256,0.004491733511288961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,65536,0.07062186400095621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,128,0.004331733286380768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,10240,0.014016000429789224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,16384,0.020304000377655028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,12288,0.01597119967142741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,8192,0.012103466192881267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,7168,0.011110400160153706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,6144,0.010132267077763876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,5120,0.009118933478991191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,3072,0.007076266904671986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,4096,0.0081194669008255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,3584,0.007979733248551685
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,1024,0.005253333350022634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,512,0.004706133405367533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,2560,0.006854400038719177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,2048,0.0061141331990559895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,1536,0.005790933469931285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,256,0.004418133199214936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,768,0.004997333387533823
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,128,0.004323199888070424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,4096,0.06890239715576171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,5120,0.08501333395640055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,6144,0.10017706553141277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,7168,0.11509439945220948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,8192,0.1319967985153198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,10240,0.16218986511230468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,2560,0.04611839850743611
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,12288,0.19258133570353192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,3072,0.05350293318430582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,3584,0.06152106523513794
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,1536,0.03052373329798381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,2048,0.037996800740559895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,1024,0.023779199520746867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,128,0.00876586635907491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,256,0.011442133784294128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,16384,0.25631252924601233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,512,0.015381333231925965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,768,0.018553600708643595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,5120,0.026105600595474242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,16384,0.0695583979288737
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,7168,0.035979731877644854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,6144,0.02975040078163147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,8192,0.03803733189900716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,12288,0.053158398469289145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,10240,0.04570666551589966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,3072,0.01798400084177653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,1024,0.009291733304659527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,4096,0.02187626759211222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,1536,0.011435733238855997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,2048,0.013309866189956665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,2560,0.01604586640993754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,3584,0.02039573391278585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,256,0.005362133185068766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,768,0.008274133503437042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,128,0.004896000027656555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,512,0.0059690664211908976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,12288,0.04278826713562012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,16384,0.055941331386566165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,6144,0.024666666984558105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,8192,0.030791467428207396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,7168,0.027437865734100342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,10240,0.037381335099538164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,65536,0.26099093755086267
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,5120,0.021527467171351115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,2048,0.011622400085131327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,4096,0.018628267447153728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,2560,0.013754666845003764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,3072,0.015106133619944253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,3584,0.017038933436075845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,1536,0.010694400469462077
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,1024,0.008236800134181977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,512,0.005538133283456167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,256,0.005153066913286845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,768,0.007218133409818013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,128,0.0048981333772341405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,65536,0.20756905873616538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,16384,0.04697599808375041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,6144,0.021592533588409422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,7168,0.024158932765324912
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,8192,0.026681600014368693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,10240,0.032551467418670654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,12288,0.03707093397776286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,5120,0.01994880040486654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,4096,0.016242133577664693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,3072,0.013317333658536276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,2048,0.010739200313886007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,3584,0.014988799889882406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,2560,0.01218666632970174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,1536,0.009481599926948548
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,1024,0.007866666714350382
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,768,0.006054399907588959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,512,0.005486933390299479
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,256,0.00517546683549881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,128,0.004846933484077454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,65536,0.17726720174153646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,16384,0.03975040117899577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,12288,0.0312885324160258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,10240,0.02643199960390727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,6144,0.018467199802398682
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,5120,0.017180800437927246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,8192,0.022789333264033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,7168,0.0206496000289917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,3072,0.011550933122634888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,3584,0.013009066383043924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,4096,0.013792000214258828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,2560,0.0108842670917511
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,1024,0.006449066599210103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,2048,0.009608532985051472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,1536,0.00864533285299937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,65536,0.1412234624226888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,768,0.0056618665655454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,512,0.005425066749254862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,256,0.005048533280690512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,128,0.004830933113892873
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,12288,0.028511999050776164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,16384,0.03637760082880656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,10240,0.02621226708094279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,8192,0.02081813414891561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,6144,0.01693333387374878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,5120,0.015018666783968607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,7168,0.01889386574427287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,4096,0.012920533617337545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,3584,0.012089600165685017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,3072,0.011087999741236369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,2560,0.010193066795667012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,2048,0.008986666798591614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,1536,0.008280533552169799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,1024,0.006053333481152853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,65536,0.1283744017283122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,768,0.005773866673310598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,512,0.005412266651789347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,128,0.0048543999592463175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,256,0.005103999873002371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,12288,0.025845332940419512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,16384,0.03215893308321635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,10240,0.023747199773788454
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,8192,0.019316265980402626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,5120,0.013794133067131042
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,7168,0.017462400595347087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,6144,0.015367466211318969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,4096,0.011991467078526814
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,3584,0.011204266548156738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,3072,0.010312533378601075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,2560,0.009619200229644775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,1024,0.005976533393065134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,2048,0.008642133076985676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,65536,0.11250346501668293
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,1536,0.007896533111731212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,768,0.0056970665852228795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,512,0.005342933535575867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,256,0.005032533407211303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,128,0.004690133531888326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,10240,0.020549333095550536
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,12288,0.025169066588083905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,16384,0.029368533690770464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,7168,0.015895467003186545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,5120,0.01279146671295166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,6144,0.014259200294812521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,8192,0.017656532923380534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,4096,0.011225600043932598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,3072,0.009826133648554485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,3584,0.010590933760007222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,65536,0.09664533138275147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,2048,0.008206933240095774
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,2560,0.009377066294352214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,1024,0.005983999868233999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,1536,0.007117866476376851
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,512,0.005293866495291392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,256,0.004966400067011515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,128,0.00472320020198822
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,768,0.005492266515890757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,16384,0.027102933327356978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,12288,0.02182719906171163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,10240,0.01907306710879008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,6144,0.0132832000652949
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,5120,0.012153599659601848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,7168,0.014365866780281067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,8192,0.016309332847595216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,65536,0.09150613149007161
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,4096,0.010821333527565003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,3072,0.009457066655158997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,3584,0.010422399640083313
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,2048,0.007419733206431071
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,1024,0.005824000140031179
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,2560,0.008926933010419209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,1536,0.0065994665026664736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,512,0.005178666611512502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,768,0.00547733356555303
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,256,0.004984533290068308
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,128,0.004701866706212362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,10240,0.01801813244819641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,12288,0.02090239922205607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,16384,0.026743467648824053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,8192,0.015878400206565856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,7168,0.014763733744621277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,6144,0.013531733552614847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,5120,0.012044800321261089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,65536,0.09034026463826497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,4096,0.010407466689745586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,3072,0.009422933061917622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,3584,0.010225066542625427
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,2048,0.007309866448243459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,2560,0.008594133456548055
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,1024,0.005874133110046387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,1536,0.006718933085600535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,768,0.0054848000407218935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,256,0.004995200037956238
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,128,0.004762666424115499
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,512,0.005167999863624572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,10240,0.017946666479110716
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,16384,0.025487999121348064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,12288,0.020473599433898926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,7168,0.014151466886202493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,6144,0.012900267044703165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,8192,0.0153546671072642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,5120,0.011548800269762675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,65536,0.08474666277567545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,4096,0.010431999961535137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,3584,0.010079999764760334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,3072,0.008892800410588582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,2048,0.007002666592597961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,1536,0.006705066561698914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,1024,0.005780266722043356
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,2560,0.008436266581217449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,768,0.0054730668663978575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,512,0.005187200009822845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,256,0.004935466746489207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,128,0.004686933259169261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,12288,0.020039467016855876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,10240,0.01757226586341858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,8192,0.015355733036994935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,16384,0.025187200307846068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,6144,0.012876799702644348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,7168,0.014147200187047324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,5120,0.011643733580907185
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,65536,0.08344639937082926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,4096,0.010191999872525533
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,3072,0.008529067039489746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,3584,0.009664000074068705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,2048,0.007036800185839335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,2560,0.007957333326339721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,1536,0.0066549330949783325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,1024,0.00582826683918635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,768,0.005514666438102722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,512,0.005177600185076395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,256,0.004948266843954722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,128,0.004655999938646952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,12288,0.019911466042200725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,10240,0.017423999309539796
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,16384,0.02480640014012655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,8192,0.015069866180419922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,65536,0.08356160322825114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,6144,0.012748799721399941
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,7168,0.01386240025361379
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,5120,0.011276800433794658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,3072,0.0081194669008255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,4096,0.00979306697845459
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,3584,0.00928000013033549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,2560,0.007829333345095318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,2048,0.006981333096822103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,1536,0.006539733211199443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,1024,0.0057087997595469155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,768,0.005419733126958212
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,512,0.005208533505598704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,256,0.004947199920813243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,128,0.004690133531888326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,16384,0.024503467480341594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,12288,0.019705599546432494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,10240,0.017259732882181803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,8192,0.014935466647148132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,65536,0.08273066679636637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,7168,0.013697066903114319
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,6144,0.012442666292190551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,5120,0.010733866691589355
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,4096,0.009286399682362874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,3584,0.009062400460243225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,3072,0.008083199958006541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,2560,0.0077567999561627705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,2048,0.006886399785677592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,1536,0.006622933348019918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,1024,0.005785599847634633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,768,0.0053962667783101406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,512,0.005186133086681366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,128,0.0046069333950678505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,256,0.004852266609668731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,16384,0.02453333338101705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,12288,0.019665066401163736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,65536,0.08241386413574218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,10240,0.017113600174585977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,8192,0.014712533354759217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,7168,0.012970667084058127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,6144,0.011762133240699768
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,5120,0.010479999581972758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,4096,0.009288533528645834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,3584,0.009085866808891296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,3072,0.00809279978275299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,2048,0.006926933427651723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,2560,0.007685333490371704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,1024,0.005724800129731497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,768,0.005435733497142792
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,1536,0.006604800124963124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,512,0.005206400156021118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,256,0.004903466502825419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,128,0.004655999938646952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,16384,0.024264534314473472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,65536,0.08241813182830811
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,10240,0.016809600591659545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,12288,0.019294933478037516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,8192,0.01421333352724711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,7168,0.01295360028743744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,6144,0.011677866180737812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,5120,0.01046720047791799
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,4096,0.009226666887601216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,3072,0.008075733482837678
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,2048,0.006910933554172516
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,2560,0.0076906666159629825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,3584,0.00902186632156372
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,1536,0.006651733318964641
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,1024,0.005745066702365876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,768,0.005385600030422211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,512,0.005054933329423269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,128,0.004621866842110952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,256,0.004822400212287903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,16384,0.02403306762377421
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,12288,0.019036799669265747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,65536,0.08214826583862304
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,10240,0.016658133268356322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,8192,0.013965866963068643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,7168,0.012840533256530761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,6144,0.011653332908948263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,5120,0.010414933164914448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,4096,0.009195733070373534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,3584,0.009054932991663616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,3072,0.008011733492215473
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,2048,0.006919466455777486
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,2560,0.007717333237330119
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,1536,0.006587733328342438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,1024,0.0057322666049003605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,768,0.00528106689453125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,256,0.00488319993019104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,512,0.005049600203831991
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,128,0.004578133424123129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,65536,0.08177920182545981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,16384,0.023587199052174886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,12288,0.01872319976488749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,10240,0.016295466820398966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,8192,0.013941333691279093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,7168,0.012885333100954691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,6144,0.011620266238848369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,5120,0.010392533739407857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,4096,0.009172266721725464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,3584,0.008986666798591614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,3072,0.00804906686147054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,2048,0.006851199766000111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,2560,0.007710933188597361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,1536,0.0065738668044408154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,1024,0.005696000158786773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,768,0.005252266426881155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,512,0.005048533280690512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,256,0.004772266745567322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,128,0.004509866734345754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,65536,0.08123413721720377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,16384,0.0234389324982961
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,12288,0.018734933932622273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,10240,0.01626240015029907
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,8192,0.01392213304837545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,7168,0.012805333733558655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,6144,0.011611732840538024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,5120,0.010420266787211101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,4096,0.009190400441487629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,3584,0.008874666690826417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,3072,0.008026666442553202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,2560,0.00766293356815974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,2048,0.006782933572928111
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,1536,0.00652266691128413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,1024,0.005709866682688395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,768,0.005260799825191498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,512,0.005025066435337067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,256,0.004745600124200186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,128,0.004682666560014089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,4096,0.06853973070780436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,6144,0.09952639738718669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,5120,0.08461333115895589
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,7168,0.11453759670257568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,8192,0.1320736010869344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,10240,0.1615562597910563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,2560,0.045544532934824626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,3584,0.06106453339258829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,3072,0.053350400924682614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,2048,0.038327467441558835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,1024,0.02335360050201416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,1536,0.031729066371917726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,12288,0.19294613202412922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,256,0.011024000247319539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,768,0.01835413376490275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,128,0.008573866883913676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,512,0.01495680014292399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,16384,0.25586986541748047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,6144,0.029752532641092937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,12288,0.057506132125854495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,7168,0.03342399994532268
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,8192,0.037707734107971194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,10240,0.045360000928243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,16384,0.06944639682769775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,5120,0.02760853370030721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,3584,0.01985599994659424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,2048,0.013343999783198038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,1536,0.01130239963531494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,2560,0.015890133380889893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,3072,0.017862399419148765
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,4096,0.021652267376581828
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,1024,0.009213866790135701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,768,0.008180266618728638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,512,0.005946666498978933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,128,0.005002666513125102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,256,0.005383466680844625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,6144,0.024421334266662598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,12288,0.042979200681050614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,10240,0.036505599816640213
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,16384,0.05512640078862509
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,7168,0.02767146627108256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,8192,0.030426667133967085
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,2048,0.011498666803042094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,5120,0.021702400843302407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,2560,0.013505066434542337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,3584,0.01676373283068339
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,3072,0.01488746702671051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,4096,0.018334933121999106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,1536,0.010066133737564088
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,1024,0.008168533444404602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,65536,0.2566741307576498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,768,0.006963199873765309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,256,0.00521066685517629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,512,0.005525333185990652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,128,0.0048991998036702475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,10240,0.0319541335105896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,6144,0.021591466665267945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,7168,0.02402346730232239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,12288,0.03900693257649739
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,16384,0.047498667240142824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,65536,0.20881813367207847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,8192,0.026259199778238936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,2048,0.011169067025184632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,5120,0.01912320057551066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,2560,0.0120170662800471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,3072,0.013505066434542337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,4096,0.016315733393033348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,3584,0.014854400356610616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,1536,0.00941439966360728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,1024,0.007718400160471599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,768,0.006086400151252747
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,512,0.005504000186920166
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,256,0.0050357331832249965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,128,0.004785066843032837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,16384,0.038500265280405684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,12288,0.031113600730895995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,10240,0.02699306607246399
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,7168,0.020154666900634766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,65536,0.17596267064412435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,8192,0.022990934054056802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,6144,0.018244266510009766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,2048,0.009517866373062133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,3584,0.012904533743858337
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,3072,0.01165120005607605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,5120,0.016062933206558227
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,2560,0.011256532867749532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,1536,0.008523733417193095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,4096,0.013801599542299906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,1024,0.0064533332983652755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,768,0.005849599838256836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,256,0.005015466610590617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,512,0.0054400001962979635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,128,0.004710400104522705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,65536,0.1409418741861979
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,8192,0.020555732647577922
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,16384,0.03622719844182332
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,12288,0.02823999921480815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,6144,0.016979199647903443
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,7168,0.01880319913228353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,10240,0.024554665883382162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,5120,0.015783466895421348
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,1536,0.00817920019229253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,4096,0.01244160036245982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,3072,0.010990933577219645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,3584,0.011942399541536967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,2560,0.010043733318646749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,2048,0.00885759989420573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,1024,0.006011733412742614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,768,0.005712000032265982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,512,0.005341866612434387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,256,0.005054933329423269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,128,0.004710400104522705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,65536,0.12622400124867755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,12288,0.025512532393137617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,7168,0.01728746692339579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,8192,0.01891626715660095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,16384,0.03213653365770976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,10240,0.02234986623128255
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,6144,0.015339733163515726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,5120,0.014478933811187745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,4096,0.011898666620254517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,3072,0.010193066795667012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,2048,0.008646399776140849
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,3584,0.011205333471298217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,2560,0.009664000074068705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,1536,0.0077674667040507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,1024,0.006052266558011373
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,65536,0.1121450662612915
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,768,0.005459199845790863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,512,0.005276800195376078
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,256,0.005043200155099233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,128,0.00468800018231074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,12288,0.023140267531077067
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,6144,0.01418773333231608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,16384,0.02914560039838155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,5120,0.012622933586438498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,7168,0.015410133202870688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,8192,0.017303466796875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,10240,0.02193173368771871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,3584,0.01071573297182719
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,4096,0.011178666353225708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,3072,0.00972266693909963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,2048,0.008054399987061818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,1536,0.006985599795977275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,2560,0.009267200032869976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,1024,0.005906133353710175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,65536,0.09899093310038248
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,128,0.0047658666968345646
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,768,0.005532800157864889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,512,0.00522986650466919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,256,0.00487253318230311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,12288,0.021741867065429688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,16384,0.02676266630490621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,10240,0.019723733266194664
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,8192,0.015639467040697734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,7168,0.014699733257293702
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,6144,0.01325653294722239
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,5120,0.012113066514333089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,2560,0.008918399612108868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,4096,0.010819199681282043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,3072,0.009476266304651896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,3584,0.010389332969983418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,2048,0.0075573335091273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,65536,0.08702826499938965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,768,0.0054400001962979635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,1024,0.005785599847634633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,1536,0.006906666855017345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,512,0.0051594664653142296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,256,0.0049333333969116214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,128,0.004640000065167745
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,12288,0.020546134312947592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,7168,0.01467626690864563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,8192,0.016033066312472026
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,10240,0.018705066045125326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,16384,0.025441066424051924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,6144,0.01325866679350535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,5120,0.012020267049471537
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,4096,0.01034986674785614
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,3584,0.010414933164914448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,3072,0.009249066313107807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,65536,0.08986773490905761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,2560,0.008500267068545024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,2048,0.006915199756622315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,1024,0.005829333265622457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,1536,0.006609066824118296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,768,0.005444266895453135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,512,0.005186133086681366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,256,0.004897066454092661
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,128,0.0046858668327331545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,16384,0.02495786746342977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,8192,0.015127467115720114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,10240,0.017658666769663493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,7168,0.014195199807484946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,12288,0.02034133275349935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,6144,0.012837333480517068
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,5120,0.011691733201344808
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,3584,0.010051199793815612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,4096,0.010284800330797832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,65536,0.08368000189463297
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,3072,0.008919466535250347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,2048,0.007176533341407776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,1536,0.0065984000762303666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,1024,0.005820799867312113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,2560,0.008534399668375652
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,768,0.005446400245030721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,512,0.005187200009822845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,128,0.004613333443800608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,256,0.004836266736189524
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,12288,0.020070399840672812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,16384,0.02504533330599467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,10240,0.017522132396697997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,8192,0.015085867047309876
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,6144,0.012728533148765564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,7168,0.013809067010879517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,5120,0.01162559986114502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,3072,0.008540800213813782
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,4096,0.010317867000897724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,65536,0.08287146886189779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,3584,0.009677867094675701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,2560,0.007895466685295106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,2048,0.006929066777229309
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,1536,0.0065087998906771345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,768,0.005401599903901418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,1024,0.005819733440876007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,512,0.0051136001944541935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,256,0.004890666902065277
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,128,0.004621866842110952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,10240,0.017326933145523072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,12288,0.019656533002853395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,7168,0.013846400380134582
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,16384,0.0245685338973999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,8192,0.014906666676203408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,6144,0.012520533800125123
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,5120,0.011320533355077107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,65536,0.0825162649154663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,4096,0.00960213343302409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,3072,0.008348799745241801
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,3584,0.009110400080680847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,2560,0.00780266672372818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,2048,0.0068896000583966565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,1536,0.006451199948787689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,768,0.005448533097902933
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,1024,0.005723733206590017
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,512,0.005145599941412607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,256,0.00487253318230311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,128,0.004593066871166229
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,16384,0.024279467264811196
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,12288,0.019563732544581096
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,10240,0.01715946594874064
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,8192,0.014762666821479798
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,65536,0.08193493684132894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,7168,0.013554132978121438
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,6144,0.012034133076667786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,5120,0.010531199971834819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,3072,0.008086400230725606
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,4096,0.009155199925104777
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,3584,0.009026133020718892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,2560,0.007730133334795634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,2048,0.006828799843788147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,1024,0.0057546665271123254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,1536,0.006499200065930684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,768,0.005328000088532766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,512,0.0051466668645540874
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,256,0.00470719983180364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,128,0.0045855998992919925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,16384,0.02415999968846639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,12288,0.019435733556747437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,10240,0.017011199394861856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,65536,0.08167573610941568
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,8192,0.01432319978872935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,7168,0.013101866841316223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,6144,0.011660800377527872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,5120,0.010382933417956035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,4096,0.009219200412432352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,3584,0.009026133020718892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,3072,0.008035199840863545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,2560,0.00767680009206136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,1536,0.006563200056552887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,2048,0.006809600194295247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,768,0.0053610667586326596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,1024,0.005712000032265982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,12288,0.01909760038057963
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,256,0.004849066833655039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,128,0.004543999830881754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,512,0.0051146666208903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,65536,0.08167146841684977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,16384,0.024156800905863442
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,3584,0.00893440047899882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,10240,0.01665386656920115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,8192,0.014275200168291726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,7168,0.012731732924779257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,6144,0.011556266744931539
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,5120,0.010417067011197408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,4096,0.009176533420880635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,3072,0.008078933258851369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,2560,0.007655466596285502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,2048,0.006786133348941803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,1536,0.006554666658242543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,768,0.0053247998158137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,512,0.005076266825199127
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,1024,0.005656533439954122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,256,0.004732800026734671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,128,0.004699733356634776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,65536,0.08138346672058105
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,16384,0.023746132850646973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,12288,0.019004799922307334
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,10240,0.016204800208409628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,8192,0.013854933778444925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,6144,0.011563733220100403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,7168,0.012665599584579468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,5120,0.010365866621335347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,4096,0.00916266640027364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,512,0.00496319979429245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,3584,0.008946133653322856
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,3072,0.007998933394749958
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,2560,0.007652266820271809
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,2048,0.006776533524195353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,1536,0.00651093324025472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,1024,0.0056533331672350565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,768,0.005398400127887726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,7168,0.012755200266838074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,256,0.004700799783070883
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,16384,0.023307732741038003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,12288,0.018599466482798258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,128,0.004523733258247375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,10240,0.016170666615168253
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,65536,0.08124480247497559
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,8192,0.01381119986375173
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,6144,0.011555199821790058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,5120,0.010365866621335347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,4096,0.009147733449935913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,3584,0.00885759989420573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,3072,0.007926400005817413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,2560,0.007585066556930542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,1536,0.006439466774463653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,2048,0.0067893331249554946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,1024,0.005731200178464254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,512,0.005008000135421753
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,256,0.004614399870236715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,768,0.005194666484991709
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,128,0.004534400006135305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,16384,0.023230934143066408
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,65536,0.08045547008514405
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,12288,0.018552533785502114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,10240,0.01617280046145121
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,8192,0.01381226678689321
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,7168,0.012676266829172769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,6144,0.011460266510645549
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,5120,0.010293333729108175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,4096,0.009108266234397889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,2560,0.007648000121116638
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,3584,0.008853333195050557
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,3072,0.0079434668024381
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,2048,0.006763733426729838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,1536,0.006417066852251689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,1024,0.00561706672112147
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,768,0.0052149335543314615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,256,0.004792533318201701
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,512,0.004999466737111409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,128,0.00444160004456838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,4096,0.06846826871236165
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,5120,0.08429546356201172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,6144,0.09971520105997721
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,7168,0.11493546962738037
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,8192,0.13175040086110432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,10240,0.16185493469238282
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,3584,0.06131413380304972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,2560,0.046130132675170896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,3072,0.0528661330540975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,12288,0.19293012619018554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,2048,0.03813440004984538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,1536,0.032280532519022624
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,512,0.014499200383822122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,1024,0.02327359914779663
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,256,0.01137600044409434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,768,0.018219733238220216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,128,0.00925546685854594
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,16384,0.2555840015411377
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,10240,0.045238399505615236
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,6144,0.029258666435877483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,8192,0.037484800815582274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,12288,0.05308800141016642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,7168,0.033370665709177655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,4096,0.021358933051427206
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,16384,0.06951786677042643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,5120,0.027565866708755493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,1024,0.00916373332341512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,3584,0.019800533850987755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,2560,0.015837867061297098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,2048,0.013235200444857279
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,3072,0.017534933487574258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,1536,0.011498666803042094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,768,0.008226133386294047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,512,0.006364800035953522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,256,0.005453866720199585
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,128,0.004948266843954722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,7168,0.02751680016517639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,12288,0.043237332503000894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,10240,0.036754135290781656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,8192,0.030409600337346392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,6144,0.02436586618423462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,16384,0.054058667023976645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,5120,0.021528534094492593
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,3584,0.016849066813786825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,2560,0.013467733065287271
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,3072,0.01507306694984436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,4096,0.018093866109848023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,2048,0.011361066500345867
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,1536,0.010021332899729412
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,768,0.0072202667593956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,1024,0.008211199939250947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,65536,0.2583658695220947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,512,0.005683200061321258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,256,0.005129600067933401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,128,0.004825599988301595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,7168,0.024141865968704223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,16384,0.046521600087483725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,6144,0.021805866559346517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,8192,0.02650559941927592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,10240,0.0349727988243103
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,12288,0.03868693510691325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,4096,0.015915733575820924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,65536,0.20729279518127441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,2560,0.012059733271598816
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,5120,0.019179733594258626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,2048,0.01076479951540629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,3584,0.01476800044377645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,1536,0.009405866265296936
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,3072,0.01367573340733846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,512,0.005458133419354757
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,256,0.005026133358478546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,768,0.006145066519578298
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,1024,0.008087466657161712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,128,0.004732800026734671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,7168,0.0204586664835612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,8192,0.02285333275794983
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,12288,0.03146986762682597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,10240,0.026760532458623247
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,16384,0.039895466963450116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,6144,0.018681599696477254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,3584,0.012806399663289388
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,65536,0.1774079958597819
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,4096,0.014086400469144186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,3072,0.011710932850837708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,5120,0.016387200355529784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,2560,0.01148373285929362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,2048,0.009507200121879578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,1536,0.008798933029174805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,1024,0.006697600086530049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,768,0.0059349333246548975
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,512,0.005427200098832448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,128,0.004698666433493296
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,256,0.004981333514054617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,65536,0.13605440457661946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,7168,0.018702934185663857
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,16384,0.035537068049112955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,12288,0.028138667345046997
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,6144,0.017119999726613364
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,10240,0.024821333090464272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,8192,0.02092693249384562
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,5120,0.015794133146603904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,4096,0.012691199779510498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,3584,0.0120170662800471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,3072,0.011018666625022887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,2048,0.008915199836095174
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,2560,0.010420266787211101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,1536,0.008272000153859456
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,1024,0.006021333237489065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,768,0.005868799984455109
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,512,0.005389866729577383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,256,0.005061333378156027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,128,0.0046858668327331545
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,65536,0.12589333057403565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,8192,0.01915093262990316
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,12288,0.024858667453130086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,16384,0.03261013428370158
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,10240,0.022210133075714112
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,7168,0.017211733261744182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,5120,0.014435199896494546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,6144,0.015507200360298156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,3072,0.010141866405804952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,4096,0.0118559996287028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,2048,0.008578133583068848
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,3584,0.011203199625015259
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,256,0.004965333143870035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,2560,0.009803733229637146
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,1536,0.007663999994595845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,1024,0.006144000093142191
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,512,0.005253333350022634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,768,0.005706666906674703
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,128,0.004695466657479604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,65536,0.1110752026240031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,12288,0.0228000005086263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,10240,0.020194133122762047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,6144,0.013857066631317139
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,16384,0.028959999481836956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,8192,0.017602133750915527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,7168,0.015794133146603904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,5120,0.013555199901262919
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,4096,0.011092266440391541
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,2048,0.008220799763997396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,3584,0.010761599739392598
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,1536,0.007154133419195812
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,3072,0.009622400005658466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,2560,0.009177600344022114
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,1024,0.0059008002281188965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,512,0.005206400156021118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,65536,0.09996586640675863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,768,0.0054293334484100345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,256,0.004962133367856344
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,128,0.004541866481304169
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,7168,0.014358400305112203
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,5120,0.012371200323104858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,16384,0.027239465713500978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,6144,0.01362559994061788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,8192,0.016795732577641807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,12288,0.021628799041112264
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,10240,0.01949440042177836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,2048,0.007362133264541626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,4096,0.01045973300933838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,3584,0.01042133371035258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,3072,0.009417600433031718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,2560,0.008974933624267578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,256,0.004791466891765595
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,1536,0.006791466474533081
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,65536,0.08563520113627116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,1024,0.005794133245944977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,768,0.005500799914201101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,512,0.0050687998533248905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,128,0.004598399996757508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,5120,0.012101333340009053
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,8192,0.016292267044385276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,7168,0.014747732877731323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,10240,0.017820799350738527
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,6144,0.012786133090655008
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,12288,0.021588265895843506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,16384,0.02695573369661967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,3584,0.010414933164914448
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,1536,0.0066453332702318835
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,2048,0.00703359991312027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,4096,0.010624000430107116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,65536,0.0895903984705607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,3072,0.009306666254997254
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,2560,0.00863039990266164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,1024,0.00581333339214325
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,768,0.005468800167242686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,512,0.005095466474692027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,256,0.004794666667779287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,128,0.00461760014295578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,16384,0.02505706747372945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,10240,0.018017067511876424
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,7168,0.01413653294245402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,12288,0.020154666900634766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,8192,0.015400532881418863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,3072,0.008923733234405517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,4096,0.010345600048700969
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,6144,0.012569600343704223
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,5120,0.01169599990049998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,3584,0.010009599725405376
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,1024,0.005755733450253805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,2560,0.007925333579381307
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,65536,0.08343786398569743
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,2048,0.007046400010585785
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,256,0.004770133395989736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,1536,0.006597333153088887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,768,0.005352533360322317
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,512,0.005132799843947092
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,128,0.004600533346335093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,16384,0.02482453385988871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,7168,0.013809067010879517
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,12288,0.020035199324289956
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,10240,0.017767467101415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,8192,0.01511253317197164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,3072,0.008900266885757447
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,65536,0.08193600177764893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,6144,0.012665599584579468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,5120,0.011595732967058818
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,2048,0.006915199756622315
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,4096,0.010276266932487487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,512,0.005176533261934916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,3584,0.009366400043169658
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,2560,0.008082133531570435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,1536,0.00655680000782013
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,1024,0.005786666770776113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,768,0.005380266904830932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,256,0.00488319993019104
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,12288,0.019643733898798622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,128,0.004539733131726583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,10240,0.017270400126775106
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,8192,0.014878933628400167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,16384,0.024301866690317787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,65536,0.08209493160247802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,7168,0.013684266805648803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,2048,0.006840533514817555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,4096,0.00974826713403066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,5120,0.01127893328666687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,6144,0.012504532933235168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,3072,0.008044800162315369
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,3584,0.009195733070373534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,2560,0.00763733337322871
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,1024,0.005667200187842051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,1536,0.006431999802589417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,128,0.004584533472855886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,256,0.004851200183232625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,768,0.0053941334287325535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,512,0.005038933455944061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,16384,0.024065067370732628
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,12288,0.01945919990539551
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,65536,0.08153173128763834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,10240,0.017087999979654947
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,8192,0.014735999703407287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,7168,0.013588266571362815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,6144,0.012170666456222534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,5120,0.010545066992441813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,2048,0.006818133095900218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,2560,0.007674666742483775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,4096,0.009147733449935913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,3584,0.009040000041325887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,512,0.0051242664456367494
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,3072,0.007964799801508587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,128,0.0045514668027559916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,1536,0.006525866687297821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,16384,0.02408533294995626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,1024,0.0056991999348004665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,768,0.005295999844868978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,256,0.004772266745567322
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,7168,0.013136000434557597
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,12288,0.019348265727361043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,10240,0.016965333620707193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,8192,0.014544000228246054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,65536,0.08126933574676513
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,6144,0.01154026687145233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,5120,0.010361599922180175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,3072,0.0079925333460172
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,4096,0.009131733576456707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,3584,0.009038933118184407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,1536,0.006495999793211619
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,2560,0.007650133470694225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,2048,0.006807466844717662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,1024,0.005681066711743673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,768,0.005286400020122528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,512,0.004983466863632202
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,256,0.004763733347256978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,128,0.00461760014295578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,10240,0.016665599743525186
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,65536,0.08114453156789145
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,8192,0.014206932981808982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,16384,0.023915733893712363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,12288,0.01922453244527181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,7168,0.012743467092514038
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,5120,0.010361599922180175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,6144,0.011498666803042094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,2560,0.007650133470694225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,4096,0.009172266721725464
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,3584,0.008961066603660583
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,3072,0.007961600025494894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,2048,0.006783999999364217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,128,0.004548266530036926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,1536,0.006443733473618825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,1024,0.005679999788602194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,768,0.0052490666508674625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,512,0.004966400067011515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,256,0.004738133152325948
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,65536,0.08089706897735596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,16384,0.023704532782236734
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,12288,0.01881493330001831
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,10240,0.016331733266512553
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,8192,0.013851733009020487
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,7168,0.012706133723258971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,5120,0.010416000088055929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,6144,0.01146666705608368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,3584,0.00890239973862966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,4096,0.009131733576456707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,3072,0.00795413355032603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,2048,0.006826666494210561
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,2560,0.0076106667518615724
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,1024,0.00561599979797999
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,1536,0.006443733473618825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,768,0.005169066786766052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,8192,0.013729066650072733
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,512,0.004969599843025208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,256,0.004580266773700714
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,128,0.004506666461626689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,65536,0.08067306677500406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,16384,0.023201066255569457
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,12288,0.018439465761184694
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,10240,0.016123732924461363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,4096,0.009082667032877604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,7168,0.012571733196576437
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,5120,0.010261332988739014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,6144,0.011479467153549194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,3584,0.00888320008913676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,3072,0.007905066510041555
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,2560,0.007561600208282471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,2048,0.00673173318306605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,1536,0.0064074665307998655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,512,0.004937600096066793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,1024,0.005596800148487091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,768,0.005198933184146881
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,256,0.004761599997679392
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,128,0.004369066655635833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,12288,0.018474666277567546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,16384,0.0231221338113149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,65536,0.0797322670618693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,10240,0.01613653302192688
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,8192,0.013745066523551942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,2048,0.006705066561698914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,7168,0.012617599964141846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,6144,0.011475200454394024
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,5120,0.010289067029953003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,4096,0.009110400080680847
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,3072,0.007904000083605449
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,2560,0.0075647999842961625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,3584,0.008746666709582011
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,512,0.004841599861780802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,256,0.004666666686534882
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,1536,0.006417066852251689
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,768,0.005268266797065735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,1024,0.005644799768924713
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,128,0.004436266422271728
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,4096,0.06820159753163656
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,6144,0.10110080242156982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,5120,0.08436480363210043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,7168,0.11451733112335205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,8192,0.13072746594746906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,10240,0.16213653882344564
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,3584,0.060598401228586826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,2048,0.03781439860661824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,3072,0.05273280143737793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,2560,0.04492799838383992
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,1024,0.023040000597635904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,128,0.008424533406893413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,1536,0.031830400228500366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,768,0.018447999159495035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,12288,0.1919413407643636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,512,0.01447466711203257
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,256,0.011009066303571065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,6144,0.029539199670155843
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,8192,0.04055146773656209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,10240,0.04518506526947021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,16384,0.2561471939086914
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,7168,0.03320639928181966
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,12288,0.05288639863332113
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,16384,0.06901013056437175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,5120,0.025597866376241046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,4096,0.021337600549062093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,3584,0.019590399662653604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,3072,0.01877440015474955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,2048,0.013145599762598673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,2560,0.015552000204722086
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,128,0.004866133133570353
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,512,0.006077866752942403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,1536,0.011211733023325603
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,1024,0.009241599837938945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,768,0.0083146666487058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,256,0.005388799806435903
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,10240,0.036806400616963705
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,8192,0.03023359974225362
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,12288,0.0431007981300354
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,6144,0.024014933904012045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,7168,0.027061333258946735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,16384,0.05591359933217367
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,3584,0.01665173371632894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,5120,0.021421867609024047
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,4096,0.018092799186706542
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,2560,0.013480533162752786
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,3072,0.01490239997704824
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,2048,0.011441066861152649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,1024,0.008183466891447704
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,768,0.007146666447321574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,512,0.005645866692066193
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,128,0.0047765334447224935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,1536,0.010033067067464192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,65536,0.2560362656911214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,256,0.005173333485921224
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,7168,0.025196800629297893
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,10240,0.03236053387324016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,16384,0.045949868361155194
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,6144,0.02132693330446879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,8192,0.02648746569951375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,12288,0.0373802661895752
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,5120,0.019215999046961467
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,3072,0.013364266355832419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,2560,0.012071466445922852
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,4096,0.016556800405184428
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,65536,0.20084373156229654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,3584,0.014711466431617738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,1024,0.007437866429487865
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,768,0.006161066889762879
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,1536,0.009268266956011455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,2048,0.010554666320482891
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,128,0.004740266501903534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,512,0.005565866827964783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,256,0.005038933455944061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,12288,0.03160960078239441
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,10240,0.026548266410827637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,6144,0.01863893270492554
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,16384,0.04106239875157674
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,7168,0.02097919980684916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,8192,0.022462934255599976
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,5120,0.016477866967519125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,65536,0.17740052541097004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,2560,0.010620799660682679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,3584,0.01295360028743744
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,1536,0.008547199765841167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,4096,0.014108799894650779
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,3072,0.011815466483434041
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,1024,0.006427733103434245
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,768,0.0059797331690788266
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,2048,0.009393067161242167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,128,0.004600533346335093
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,256,0.004970666766166687
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,512,0.005314133564631144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,12288,0.0280021329720815
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,65536,0.14150187174479167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,16384,0.03583679993947347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,10240,0.023825067281723022
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,6144,0.016777600844701132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,3584,0.011913599570592244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,5120,0.014991999665896098
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,8192,0.020703999201456706
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,7168,0.018806399901707967
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,4096,0.013745066523551942
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,1536,0.008106666803359985
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,1024,0.006233599781990051
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,3072,0.010709333419799804
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,768,0.005772800246874491
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,256,0.0050357331832249965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,2560,0.010007466872533162
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,2048,0.009019733468691508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,512,0.005430399874846141
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,128,0.004632533093293508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,65536,0.12511040369669596
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,12288,0.02542080084482829
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,16384,0.032764800389607746
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,10240,0.02205866575241089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,8192,0.018915200233459474
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,7168,0.017378133535385133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,6144,0.015332266688346863
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,2048,0.00860800047715505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,4096,0.012299733360608418
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,5120,0.014505599935849508
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,3584,0.011228799819946289
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,3072,0.010146133104960124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,2560,0.009485866626103718
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,1536,0.007779199878374736
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,1024,0.006108800073464712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,65536,0.10688640276590984
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,256,0.0048096001148223875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,512,0.005275733272234599
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,768,0.005703466633955637
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,128,0.0045738667249679565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,6144,0.014452266693115234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,16384,0.029361067215601604
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,8192,0.01758293310801188
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,5120,0.012533332904179892
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,10240,0.021681066354115805
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,12288,0.02325119972229004
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,7168,0.015862400333086647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,4096,0.011413333813349406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,3584,0.010918399691581726
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,3072,0.009845333298047383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,2048,0.008201600114504496
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,768,0.005599999924500784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,512,0.005211733281612396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,128,0.004622933268547058
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,2560,0.009084799885749817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,1536,0.006860800087451935
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,1024,0.005865600208441416
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,65536,0.10003626346588135
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,256,0.004811733464399974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,16384,0.027395200729370118
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,8192,0.01635199983914693
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,7168,0.014841600259145101
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,12288,0.02077546715736389
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,6144,0.013879467050234476
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,5120,0.012291199962298075
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,10240,0.018294399976730345
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,2560,0.008870399991671244
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,4096,0.010995200276374817
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,3584,0.010264533758163451
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,768,0.005335466563701629
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,3072,0.009462400277455648
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,256,0.004867200056711833
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,1536,0.006628266473611195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,2048,0.007645866771539052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,1024,0.005686399837334951
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,65536,0.09153280258178711
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,512,0.005097599824269613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,7168,0.015227733055750528
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,128,0.004613333443800608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,10240,0.019003732999165853
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,16384,0.026679466168085735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,4096,0.010406399766604107
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,8192,0.015205333630243937
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,12288,0.021655466159184775
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,6144,0.013238400220870972
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,3072,0.009432533383369445
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,2560,0.008108800152937572
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,5120,0.012158933281898498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,2048,0.007018666466077168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,1024,0.005758933226267497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,3584,0.010296533505121868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,65536,0.09051199754079184
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,1536,0.006543999910354615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,768,0.005479466418425242
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,512,0.005141333242257436
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,256,0.004923733572165171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,128,0.004554666578769684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,12288,0.020104533433914183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,10240,0.018049067258834837
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,5120,0.01143999993801117
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,16384,0.025693867603937787
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,8192,0.015436800320943198
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,7168,0.014006400108337402
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,6144,0.012956800063451132
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,4096,0.010364799698193868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,1024,0.005734399954477946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,65536,0.08391573429107665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,3584,0.0101173331340154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,3072,0.008419199784596761
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,2048,0.00709440012772878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,2560,0.008220799763997396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,1536,0.006525866687297821
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,768,0.005400533477465311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,512,0.005118933320045471
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,256,0.004775466521581014
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,128,0.004520533482233683
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,10240,0.01730026602745056
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,12288,0.020126932859420778
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,6144,0.012744533022244773
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,16384,0.02482346693674723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,8192,0.015315199891726175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,7168,0.013991467157999673
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,5120,0.011592533191045125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,4096,0.010116266210873921
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,65536,0.08163306713104249
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,3584,0.009543466567993163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,3072,0.008392533659934998
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,2560,0.00784746656815211
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,768,0.0053045332431793215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,2048,0.006888533135255177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,1536,0.006492800017197926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,512,0.0050346667567888895
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,1024,0.005710933109124502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,128,0.004531200230121613
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,256,0.0047082667549451195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,12288,0.019573332866032918
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,10240,0.017249067624409996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,16384,0.024322134256362916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,8192,0.014728533228238425
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,7168,0.013739732901255288
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,6144,0.012446932991345723
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,5120,0.011205333471298217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,3072,0.008089600006739299
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,4096,0.00976639986038208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,2560,0.007830399771531422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,65536,0.08189760049184164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,1536,0.0063967997829119366
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,768,0.005295999844868978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,512,0.005126399795214335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,3584,0.009100799759229023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,2048,0.0068245331446329755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,1024,0.00559146652619044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,12288,0.01942080060640971
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,128,0.0046079998215039575
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,256,0.004740266501903534
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,16384,0.024193066358566283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,10240,0.0170741339524587
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,6144,0.012225066622098286
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,8192,0.014711466431617738
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,4096,0.009283199906349182
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,7168,0.013563733299573263
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,65536,0.08145493666330973
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,5120,0.010949333508809406
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,3584,0.009031466643015544
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,1024,0.0056415999929110205
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,768,0.005221333106358846
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,3072,0.007947733501593272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,2048,0.006774400174617767
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,2560,0.00767573316891988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,1536,0.006562133133411407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,12288,0.019324799378712974
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,512,0.005022933085759481
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,8192,0.014317867159843446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,256,0.004801066716512045
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,128,0.004470400015513102
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,65536,0.08105706373850505
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,16384,0.02405440012613932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,10240,0.0169322669506073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,4096,0.009243733684221904
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,7168,0.012910933295885722
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,1536,0.006479999919732411
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,5120,0.010286933183670044
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,6144,0.01146346628665924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,3072,0.007949866851170858
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,3584,0.008933333555857341
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,2560,0.007608533402283986
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,2048,0.006772266825040181
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,1024,0.005596800148487091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,768,0.005285333096981049
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,512,0.005019733309745788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,256,0.004694400231043497
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,128,0.00454720010360082
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,10240,0.016497066617012023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,16384,0.023756800095240276
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,12288,0.018969599405924478
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,7168,0.01297599971294403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,65536,0.08093760013580323
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,3584,0.008936533331871032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,8192,0.01386666695276896
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,2560,0.007606400052706401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,6144,0.011453866958618164
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,5120,0.010321066776911417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,4096,0.009149866302808125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,1024,0.005683200061321258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,3072,0.007940266529719036
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,2048,0.006763733426729838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,1536,0.006446933249632518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,16384,0.02365866700808207
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,10240,0.016407466928164163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,512,0.005003733436266581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,768,0.005239466826121012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,256,0.004657066861788432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,12288,0.01889386574427287
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,128,0.004444799820582072
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,65536,0.08087680339813233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,6144,0.011441066861152649
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,4096,0.009079466263453167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,8192,0.013773866494496665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,7168,0.012651733557383218
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,2048,0.006740266581376393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,5120,0.010253866513570149
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,3584,0.008905599514643352
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,3072,0.007931733131408691
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,1536,0.00639466643333435
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,2560,0.007654400169849395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,1024,0.005578666428724925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,512,0.004923733572165171
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,768,0.005177600185076395
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,256,0.0045738667249679565
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,128,0.004363733530044556
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,16384,0.023194666703542074
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,65536,0.08040853341420492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,12288,0.01845973332722982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,10240,0.016060800353686015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,8192,0.013733333349227906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,5120,0.010244266192118327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,3584,0.008806399504343669
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,7168,0.01263146698474884
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,2560,0.0075445334116617845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,6144,0.011389866471290588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,4096,0.009027199943860371
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,3072,0.007975466549396515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,2048,0.006680533289909363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,1024,0.005571199953556061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,1536,0.006358399987220764
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,768,0.005176533261934916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,512,0.004903466502825419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,256,0.004555733501911163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,128,0.004411733150482178
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,65536,0.07939840157826741
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,8192,0.013726933797200521
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,16384,0.02318933407465617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,3584,0.008743466933568318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,12288,0.01844373345375061
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,10240,0.0160671999057134
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,7168,0.012609066565831504
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,4096,0.009099733829498292
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,5120,0.010221866766611735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,6144,0.011354666948318482
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,1024,0.005554133156935374
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,3072,0.007863466441631318
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,768,0.005201066533724466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,2560,0.007538133362929027
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,2048,0.006679466863473256
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,1536,0.006439466774463653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,128,0.004427733520666758
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,512,0.004869333406289419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,256,0.004545066754023234
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,5120,0.08365226586659749
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,4096,0.06838719844818116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,7168,0.11481280326843261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,6144,0.09939093589782715
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,8192,0.13074560165405275
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,3072,0.0526581327120463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,10240,0.16163093249003094
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,2048,0.03687573273976644
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,2560,0.04554239908854167
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,3584,0.06037439902623495
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,1536,0.03141866723696391
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,768,0.01912426749865214
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,12288,0.19305493036905924
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,512,0.014406399925549826
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,256,0.01097813347975413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,1024,0.02177919944127401
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,128,0.008612266182899475
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,16384,0.25573546091715493
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,12288,0.05838293234507243
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,7168,0.033056000868479415
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,10240,0.045108266671498615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,8192,0.037247999509175615
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,6144,0.02910826603571574
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,4096,0.021296000480651854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,16384,0.0688927968343099
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,3584,0.019428267081578573
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,5120,0.027405865987141925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,2560,0.015440000096956888
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,2048,0.01300266683101654
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,3072,0.01754986643791199
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,768,0.008270933230717977
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,1536,0.011397332946459452
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,1024,0.009145599603652955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,512,0.006402133405208588
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,128,0.004829866687456766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,256,0.005190399785836538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,16384,0.05436586538950602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,7168,0.027747199932734175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,10240,0.03684373299280803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,12288,0.04272213379542033
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,6144,0.024378667275110878
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,8192,0.030245333909988403
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,4096,0.01783999999364217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,5120,0.02103786667188009
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,3072,0.014661332964897156
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,768,0.0073066666722297665
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,1024,0.008094933132330577
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,3584,0.0168778657913208
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,2560,0.013446399569511413
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,2048,0.011412266890207927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,1536,0.009944533308347065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,128,0.004747733473777771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,65536,0.2562922636667887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,512,0.005678933362166087
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,256,0.005042133231957754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,10240,0.03227733373641968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,12288,0.03652906815210978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,6144,0.021389865875244142
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,8192,0.0295199990272522
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,16384,0.04814933141072591
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,7168,0.02347093423207601
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,5120,0.018776534001032512
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,3072,0.013131733735402426
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,3584,0.014405333002408347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,4096,0.016532267133394875
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,2560,0.012154666582743327
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,65536,0.2072447935740153
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,2048,0.010501333077748616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,256,0.005043200155099233
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,1536,0.009199999769528707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,1024,0.007620266576608021
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,512,0.005551999807357788
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,768,0.006281599899133046
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,128,0.004725333551565806
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,10240,0.027036799987157183
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,12288,0.030776532491048177
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,8192,0.022835199038187662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,7168,0.02028053402900696
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,65536,0.1767946720123291
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,16384,0.039332266648610434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,6144,0.018313600619633993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,5120,0.016697599490483602
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,3584,0.012471466263135274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,4096,0.014189866185188294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,3072,0.011529599626859028
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,2560,0.011297067006429035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,2048,0.009324799974759419
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,1536,0.008504533767700195
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,1024,0.006320000191529592
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,768,0.0059338668982187905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,256,0.004891733328501383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,512,0.005392000079154968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,128,0.004594133297602335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,65536,0.13481814066569012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,12288,0.02833706736564636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,16384,0.036055465539296463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,4096,0.01288640002409617
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,5120,0.015809067090352378
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,8192,0.02106026609738668
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,10240,0.024385066827138265
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,3584,0.011945600310961407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,7168,0.01885653336842855
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,6144,0.016727467377980552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,2560,0.010013866424560546
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,2048,0.008901333808898926
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,768,0.0058335999647776285
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,3072,0.010773332913716634
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,1536,0.008062933385372163
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,1024,0.006129066646099091
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,256,0.0049685334165891016
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,512,0.005329066514968872
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,128,0.004602666695912679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,16384,0.0319925328095754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,8192,0.018662399053573607
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,12288,0.025250132878621417
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,65536,0.12529173692067463
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,10240,0.022056533892949422
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,7168,0.017032533884048462
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,6144,0.015169066190719605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,5120,0.014072533448537192
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,4096,0.011686399579048157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,3584,0.011077333490053813
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,1536,0.007095466554164887
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,3072,0.010619733730951946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,2560,0.009542399644851684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,2048,0.00848533312479655
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,768,0.005447466671466827
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,1024,0.0060479998588562015
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,65536,0.1114847977956136
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,512,0.005245866874853769
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,256,0.004807466765244802
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,128,0.0046293333172798155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,10240,0.020526933670043945
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,16384,0.02881493369738261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,5120,0.012689066926638284
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,12288,0.022300799687703453
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,6144,0.013948800166447959
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,3072,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,8192,0.018490666151046754
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,7168,0.015633066495259605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,4096,0.011342933773994446
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,1536,0.006829866766929626
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,3584,0.010818133751551311
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,512,0.005169066786766052
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,2560,0.009090133508046468
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,1024,0.005819733440876007
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,128,0.004474666714668274
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,2048,0.008050133287906647
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,768,0.005588266750176748
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,65536,0.0991487979888916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,256,0.004877866804599762
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,10240,0.018822399775187175
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,6144,0.013843199610710144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,16384,0.025565866629282636
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,7168,0.014605866869290671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,12288,0.02167466680208842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,8192,0.015599999825159708
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,5120,0.012197333574295043
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,3584,0.010309333602587383
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,2048,0.007573333382606506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,1536,0.00658240020275116
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,4096,0.010844799876213073
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,512,0.005065600077311197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,1024,0.005690666536490122
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,65536,0.09179519812266032
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,3072,0.00920853316783905
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,2560,0.00883840024471283
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,768,0.005412266651789347
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,10240,0.017803732554117838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,8192,0.015945600469907124
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,12288,0.02235306700070699
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,256,0.004726399978001913
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,128,0.004569600025812784
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,16384,0.02656746705373128
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,5120,0.01206826666990916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,7168,0.014712533354759217
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,6144,0.012776533762613932
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,4096,0.010763733585675558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,3584,0.009955199559529622
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,3072,0.009351467092831928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,65536,0.09028586546579996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,2560,0.008467200398445129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,2048,0.0073077330986658735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,128,0.004587733248869578
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,768,0.005377066632111868
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,512,0.004966400067011515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,1536,0.00651093324025472
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,1024,0.005751466751098633
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,256,0.0048437332113583885
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,10240,0.01798080007235209
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,6144,0.012746666868527731
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,8192,0.015007999539375306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,3584,0.009895466764767965
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,12288,0.02014933427174886
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,7168,0.014098133643468222
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,16384,0.025047467152277632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,2560,0.007974400122960409
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,4096,0.010315733154614766
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,5120,0.011735467116038005
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,3072,0.008745599786440532
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,1536,0.006532266736030579
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,768,0.005307733515898387
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,65536,0.08363306522369385
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,256,0.004805333415667216
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,2048,0.007124266525109608
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,1024,0.005700266857941946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,512,0.0049792001644770306
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,128,0.004560000201066335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,10240,0.017474132776260375
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,12288,0.019937066237131755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,16384,0.024689066410064697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,8192,0.015126400192578635
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,7168,0.014008532961209616
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,6144,0.012705066800117492
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,65536,0.08354667027791342
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,3584,0.009875200192133586
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,5120,0.01136959989865621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,4096,0.010190932949384054
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,1536,0.006461866696675618
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,3072,0.00821973333756129
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,2560,0.007846400141716003
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,512,0.0049674664934476215
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,256,0.0047872001926104225
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,2048,0.0070165331164995836
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,768,0.005425066749254862
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,1024,0.005746133128801982
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,12288,0.019427200158437095
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,128,0.004503466685612996
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,6144,0.012468266487121581
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,4096,0.009717333316802978
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,10240,0.017246933778127034
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,16384,0.024555732806523643
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,65536,0.08173333009084066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,8192,0.014845866958300272
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,7168,0.013714133699735006
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,2048,0.006786133348941803
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,5120,0.011084799965222675
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,1024,0.005568000177542368
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,768,0.0053375999132792154
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,3584,0.009045333663622538
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,3072,0.008147199948628742
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,2560,0.007646933197975159
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,256,0.004673066735267639
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,1536,0.006442666550477346
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,128,0.004494933287302653
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,12288,0.019372800985972084
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,512,0.005063466727733612
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,4096,0.00921066701412201
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,16384,0.0240938663482666
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,65536,0.0812053362528483
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,10240,0.017029333114624023
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,7168,0.013589333494504294
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,8192,0.014605866869290671
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,3584,0.008921600381533305
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,6144,0.01206826666990916
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,5120,0.01076693336168925
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,2560,0.007607466479142506
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,256,0.004644266764322917
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,3072,0.007946666578451793
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,2048,0.006734933455785115
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,1536,0.006454400221506755
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,768,0.005234133203824361
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,1024,0.00560746689637502
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,512,0.004964266717433929
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,128,0.004522666831811269
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,16384,0.02397866646448771
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,10240,0.016898133357365928
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,8192,0.014441600441932679
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,12288,0.01927679975827535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,7168,0.01286186675230662
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,65536,0.08108906745910645
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,6144,0.01145919958750407
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,5120,0.010300800204277039
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,3584,0.008941866954167684
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,2560,0.007565866907437642
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,2048,0.006723199784755707
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,512,0.004990933338801066
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,4096,0.009107200304667155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,3072,0.007995733122030894
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,1536,0.006378666559855144
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,1024,0.005605333546797434
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,768,0.005202133456865946
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,10240,0.016501333316167197
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,256,0.004691199958324432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,128,0.004564266900221507
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,16384,0.023947733640670776
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,12288,0.0190175990263621
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,65536,0.08093653519948324
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,8192,0.013797332843144735
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,7168,0.012657066186269125
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,6144,0.011474133531252543
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,5120,0.010270933310190838
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,4096,0.009111467003822326
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,3072,0.007909333209196727
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,3584,0.00888320008913676
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,2560,0.007658666869004567
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,512,0.004940799872080485
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,2048,0.006704000135262807
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,1536,0.006424533327420552
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,1024,0.005570133527119955
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,768,0.005166933437188466
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,65536,0.0807477315266927
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,256,0.004682666560014089
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,8192,0.013768532872200012
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,128,0.004499199986457825
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,7168,0.012590932846069335
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,16384,0.023670399188995363
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,10240,0.0164000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,12288,0.01876373291015625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,3584,0.008855467041333515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,6144,0.011520000298817952
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,3072,0.007893333335717519
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,5120,0.010226133465766906
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,2048,0.006673066814740498
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,4096,0.0090421328941981
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,1536,0.006342400113741558
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,1024,0.005565866827964783
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,768,0.005211733281612396
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,2560,0.0075573335091273
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,65536,0.0805728038152059
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,12288,0.018450133005777993
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,16384,0.023098667462666832
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,512,0.0049216002225875854
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,8192,0.013693867127100625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,256,0.0046528001626332605
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,128,0.004448000093301137
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,10240,0.016190933187802632
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,5120,0.010188800096511842
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,7168,0.012596266468365988
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,2560,0.0075093333919843035
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,6144,0.011371733744939168
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,4096,0.009108266234397889
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,3584,0.008728532989819845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,3072,0.007868800063927968
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,1024,0.00550186683734258
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,2048,0.006740266581376393
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,512,0.004874666531880697
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,1536,0.0063178668419520065
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,12288,0.018453333775202432
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,768,0.005153066913286845
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,256,0.0046015997727712
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,65536,0.07952000300089518
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,128,0.004403199752171834
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,16384,0.02309760053952535
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,10240,0.016039466857910155
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,8192,0.013724799950917563
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,4096,0.009010133147239686
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,5120,0.01023466686407725
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,7168,0.01264639993508657
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,6144,0.011405866344769795
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,768,0.00514026681582133
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,1024,0.005533866584300995
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,3584,0.008711466193199157
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,3072,0.007874133189519246
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,256,0.0046410664916038515
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,2560,0.007490133245786031
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,2048,0.006631466746330261
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,1536,0.0063285330931345625
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,512,0.004881066580613455
TRTLLM,1.0.0rc3,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,128,0.0044383997718493145
